aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2019-10-14 09:03:05 +1100
committerJustin Bedo <cu@cua0.org>2019-12-20 10:39:47 +1100
commit7594c3bb8747efaf05bad164088c7e2d3586c891 (patch)
treef5776df1704fdef8285ba9b72f11eb4bd319ea80 /tools
parent5cffb4a9cb2cc97eebb1513e7f78ddad17c9af16 (diff)
kallisto: attempt at making determinstic
Diffstat (limited to 'tools')
-rw-r--r--tools/kallisto-quant-empty.h5bin0 -> 6544 bytes
-rw-r--r--tools/kallisto-quant.nix28
2 files changed, 28 insertions, 0 deletions
diff --git a/tools/kallisto-quant-empty.h5 b/tools/kallisto-quant-empty.h5
new file mode 100644
index 0000000..2921292
--- /dev/null
+++ b/tools/kallisto-quant-empty.h5
Binary files differ
diff --git a/tools/kallisto-quant.nix b/tools/kallisto-quant.nix
index 9f0ab57..c1083c8 100644
--- a/tools/kallisto-quant.nix
+++ b/tools/kallisto-quant.nix
@@ -22,6 +22,27 @@ inputs:
let
inherit (bionix.types) matchFiletype';
isFastQ = matchFiletype' "kallisto-quant" {fq = _: true; gz = isFastQ; };
+
+ empty = ./kallisto-quant-empty.h5;
+
+ python = pkgs.python3Packages.python.withPackages (p: with p; [ h5py ]);
+
+ noStamp = pkgs.writeScript "nostamp.py" ''
+ #!${python}/bin/python
+ import h5py
+ import os
+ def copy(obj, out, path):
+ if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
+ for key in obj.keys():
+ if key != "start_time":
+ copy(obj[key], out, path + "/" + key)
+ elif type(obj)==h5py._hl.dataset.Dataset:
+ out.create_dataset(path, data=f[path], track_order=False, track_times = False)
+ with h5py.File(os.environ['out'] + "/abundance.h5", "r") as f:
+ with h5py.File("repack.h5", "a", track_order = False) as g:
+ copy(f, g, "")
+ '';
+
in
assert (all (x: isFastQ (x.filetype)) inputs);
@@ -43,6 +64,13 @@ stage {
${optionalString rfStranded "--rf-stranded"} \
-t $NIX_BUILD_CORES \
${concatStringsSep " " inputs}
+
+ # Make deterministic by removing timestamps and using hdf5 empty template
+ cp ${empty} repack.h5
+ chmod 644 repack.h5
+ ${noStamp}
+ cp repack.h5 $out/abundance.h5
+ sed -i $out/run_info.json -e '/start_time/d'
'';
passthru.multicore = true;
}