aboutsummaryrefslogtreecommitdiff
path: root/tools/kallisto-quant.nix
blob: 3efeaaabe1be48bff3cf10c177c3469b5f4ddf0d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
{bionix
, indexFlags ? {}
, bias ? false
, bootstrapSamples ? 0
, seed ? 42
, plaintext ? false
, fusion ? false
, single ? false
, frStranded ? false
, rfStranded ? false
, fragmentLength ? null
, fragmentSD ? null
, ref}:

with bionix;
with lib;

assert (!single || (fragmentLength != null && fragmentSD != null));

inputs:

let
  inherit (bionix.types) matchFiletype';
  isFastQ = matchFiletype' "kallisto-quant" {fq = _: true; gz = isFastQ; };

  empty = ./kallisto-quant-empty.h5;

  python = pkgs.python3Packages.python.withPackages (p: with p; [ h5py ]);

  noStamp = pkgs.writeScript "nostamp.py" ''
    #!${python}/bin/python
    import h5py
    import os
    def copy(obj, out, path):
      if type(obj) in [h5py._hl.group.Group,h5py._hl.files.File]:
        for key in obj.keys():
          if key != "start_time":
            copy(obj[key], out, path + "/" + key)
      elif type(obj)==h5py._hl.dataset.Dataset:
        out.create_dataset(path, data=f[path], track_order=False, track_times = False)
    with h5py.File(os.environ['out'] + "/abundance.h5", "r") as f:
      with h5py.File("repack.h5", "a", track_order = False) as g:
        copy(f, g, "")
  '';

in

assert (all (x: isFastQ (x.filetype)) inputs);

stage {
  name = "kallisto-quant";
  buildInputs = with pkgs; [ kallisto ];
  buildCommand = ''
    mkdir $out
    kallisto quant \
      -i ${bionix.kallisto.index indexFlags ref} \
      -o $out \
      ${optionalString bias "--bias"} \
      ${optionalString (bootstrapSamples > 0) "-b ${toString bootstrapSamples} --seed=${toString seed}"} \
      ${optionalString plaintext "--plaintext"} \
      ${optionalString fusion "--fusion"} \
      ${optionalString single "--single -l ${toString fragmentLength} -s ${toString fragmentSD}"} \
      ${optionalString frStranded "--fr-stranded"} \
      ${optionalString rfStranded "--rf-stranded"} \
      -t $NIX_BUILD_CORES \
      ${concatStringsSep " " inputs}

    # Make deterministic by removing timestamps and using hdf5 empty template
    cp ${empty} repack.h5
    chmod 644 repack.h5
    ${noStamp}
    cp repack.h5 $out/abundance.h5
    sed -i $out/run_info.json -e '/"start_time"/d'
    sed -i $out/run_info.json -e '/"call"/d'
  '';
  passthru.multicore = true;
}