diff options
author | Justin Bedo <cu@cua0.org> | 2019-10-14 09:03:05 +1100 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2019-12-20 10:39:47 +1100 |
commit | 7594c3bb8747efaf05bad164088c7e2d3586c891 (patch) | |
tree | f5776df1704fdef8285ba9b72f11eb4bd319ea80 | |
parent | 5cffb4a9cb2cc97eebb1513e7f78ddad17c9af16 (diff) |
kallisto: attempt at making determinstic
-rw-r--r-- | tools/kallisto-quant-empty.h5 | bin | 0 -> 6544 bytes | |||
-rw-r--r-- | tools/kallisto-quant.nix | 28 |
2 files changed, 28 insertions, 0 deletions
diff --git a/tools/kallisto-quant-empty.h5 b/tools/kallisto-quant-empty.h5 Binary files differnew file mode 100644 index 0000000..2921292 --- /dev/null +++ b/tools/kallisto-quant-empty.h5 diff --git a/tools/kallisto-quant.nix b/tools/kallisto-quant.nix index 9f0ab57..c1083c8 100644 --- a/tools/kallisto-quant.nix +++ b/tools/kallisto-quant.nix @@ -22,6 +22,27 @@ inputs: let inherit (bionix.types) matchFiletype'; isFastQ = matchFiletype' "kallisto-quant" {fq = _: true; gz = isFastQ; }; + + empty = ./kallisto-quant-empty.h5; + + python = pkgs.python3Packages.python.withPackages (p: with p; [ h5py ]); + + noStamp = pkgs.writeScript "nostamp.py" '' + #!${python}/bin/python + import h5py + import os + def copy(obj, out, path): + if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]: + for key in obj.keys(): + if key != "start_time": + copy(obj[key], out, path + "/" + key) + elif type(obj)==h5py._hl.dataset.Dataset: + out.create_dataset(path, data=f[path], track_order=False, track_times = False) + with h5py.File(os.environ['out'] + "/abundance.h5", "r") as f: + with h5py.File("repack.h5", "a", track_order = False) as g: + copy(f, g, "") + ''; + in assert (all (x: isFastQ (x.filetype)) inputs); @@ -43,6 +64,13 @@ stage { ${optionalString rfStranded "--rf-stranded"} \ -t $NIX_BUILD_CORES \ ${concatStringsSep " " inputs} + + # Make deterministic by removing timestamps and using hdf5 empty template + cp ${empty} repack.h5 + chmod 644 repack.h5 + ${noStamp} + cp repack.h5 $out/abundance.h5 + sed -i $out/run_info.json -e '/start_time/d' ''; passthru.multicore = true; } |