From 7594c3bb8747efaf05bad164088c7e2d3586c891 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 14 Oct 2019 09:03:05 +1100 Subject: kallisto: attempt at making determinstic --- tools/kallisto-quant-empty.h5 | Bin 0 -> 6544 bytes tools/kallisto-quant.nix | 28 ++++++++++++++++++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 tools/kallisto-quant-empty.h5 diff --git a/tools/kallisto-quant-empty.h5 b/tools/kallisto-quant-empty.h5 new file mode 100644 index 0000000..2921292 Binary files /dev/null and b/tools/kallisto-quant-empty.h5 differ diff --git a/tools/kallisto-quant.nix b/tools/kallisto-quant.nix index 9f0ab57..c1083c8 100644 --- a/tools/kallisto-quant.nix +++ b/tools/kallisto-quant.nix @@ -22,6 +22,27 @@ inputs: let inherit (bionix.types) matchFiletype'; isFastQ = matchFiletype' "kallisto-quant" {fq = _: true; gz = isFastQ; }; + + empty = ./kallisto-quant-empty.h5; + + python = pkgs.python3Packages.python.withPackages (p: with p; [ h5py ]); + + noStamp = pkgs.writeScript "nostamp.py" '' + #!${python}/bin/python + import h5py + import os + def copy(obj, out, path): + if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]: + for key in obj.keys(): + if key != "start_time": + copy(obj[key], out, path + "/" + key) + elif type(obj)==h5py._hl.dataset.Dataset: + out.create_dataset(path, data=f[path], track_order=False, track_times = False) + with h5py.File(os.environ['out'] + "/abundance.h5", "r") as f: + with h5py.File("repack.h5", "a", track_order = False) as g: + copy(f, g, "") + ''; + in assert (all (x: isFastQ (x.filetype)) inputs); @@ -43,6 +64,13 @@ stage { ${optionalString rfStranded "--rf-stranded"} \ -t $NIX_BUILD_CORES \ ${concatStringsSep " " inputs} + + # Make deterministic by removing timestamps and using hdf5 empty template + cp ${empty} repack.h5 + chmod 644 repack.h5 + ${noStamp} + cp repack.h5 $out/abundance.h5 + sed -i $out/run_info.json -e '/start_time/d' ''; passthru.multicore = true; } -- cgit v1.2.3