diff options
authorJustin Bedo <cu@cua0.org>2018-10-05 16:16:20 +1000
committerJustin Bedo <cu@cua0.org>2018-10-05 16:19:06 +1000
commit871ef64f3c43199dfa01216ac86db56650c2c8a2 (patch)
parentdd3666f6a069105e61f8889665cf55eed9a14e51 (diff)
implement types
19 files changed, 258 insertions, 44 deletions
diff --git a/default.nix b/default.nix
index 1a8f341..7002a57 100644
--- a/default.nix
+++ b/default.nix
@@ -7,7 +7,10 @@ let
callBionix = callBionix;
id = x: x;
+ types = callBionix ./lib/types.nix {};
bwa = callBionix ./tools/bwa.nix {};
+ compression = callBionix ./tools/compression.nix {};
crumble = callBionix ./tools/crumble.nix {};
fastqc = callBionix ./tools/fastqc.nix {};
gridss = callBionix ./tools/gridss.nix {};
@@ -22,5 +25,6 @@ let
ref = callBionix ./lib/references.nix {};
def = f: defs: attrs: f (defs // attrs);
defQsub = qsubAttrs: f: defs: qsubAttr qsubAttrs (def f defs);
in bionix
diff --git a/lib/references.nix b/lib/references.nix
index 61655cd..3614647 100755
--- a/lib/references.nix
+++ b/lib/references.nix
@@ -1,6 +1,7 @@
{ bionix, nixpkgs }:
with nixpkgs;
+with bionix.types;
rec {
grch38 = grch38-p12;
@@ -13,6 +14,7 @@ rec {
sha256 = "0ji2ggpmgnbpwbhq8mirj6h3lyy02nl2rnz7n892iq5cqpsblh4z";
buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
blacklist = stdenvNoCC.mkDerivation {
name = "blacklist-grch38";
@@ -21,6 +23,7 @@ rec {
sha256 = "1lpnqq1mjidbdxj5i6x26rxa8x1rs8q3hlf0z1z49j3jsnkgffky";
buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.bed { ref = seq; };
@@ -34,6 +37,7 @@ rec {
sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr";
buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
@@ -47,6 +51,7 @@ rec {
sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c";
buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
diff --git a/lib/types.nix b/lib/types.nix
new file mode 100644
index 0000000..a347046
--- /dev/null
+++ b/lib/types.nix
@@ -0,0 +1,64 @@
+{bionix, nixpkgs}:
+with nixpkgs;
+ nix-adt-src = fetchFromGitHub {
+ owner = "shlevy";
+ repo = "nix-adt";
+ rev = "dd04b5d08eed65ecd73aafde56a78078e09f1c74";
+ sha256 = "0vhk1y7gilgn2pgvj4813bh2ljpw4pvrph8k8b0fsg56dbm8mqxa";
+ };
+ nix-adt = import "${nix-adt-src}";
+ inherit (nix-adt.checked) make-type match any std none;
+ inherit (std) option;
+ idft = sym: ft: _: abort "unhandled filetype (${ft}) for ${sym}";
+ idst = sym: st: _: abort "unhandled sorting (${st}) for ${sym}";
+rec {
+ option-sort = option sorting;
+ matchFiletype = sym: y: x: if x ? filetype then match x.filetype ({
+ fa = idft sym "fasta";
+ fq = idft sym "fastq";
+ bam = idft sym "bam";
+ sam = idft sym "sam";
+ cram = idft sym "cram";
+ vcf = idft sym "vcf";
+ bed = idft sym "bed";
+ gz = idft sym "gz";
+ bz2 = idft sym "bz2";
+ } // y) else abort "unknown filetype for ${sym}";
+ filetype = make-type "filetype" {
+ fa = {};
+ fq = {};
+ bam = {ref = any; sorting = option-sort;};
+ sam = {ref = any; sorting = option-sort;};
+ cram = {ref = any; sorting = option-sort;};
+ vcf = {ref = any;};
+ bed = {ref = any;};
+ gz = filetype;
+ bz2 = filetype;
+ };
+ toCram = matchFiletype "bam2cram" { bam = filetype.cram; sam = filetype.cram; cram = filetype.cram; };
+ toBam = matchFiletype "bam2cram" { bam = filetype.bam; sam = filetype.bam; cram = filetype.bam; };
+ toSam = matchFiletype "bam2cram" { bam = filetype.sam; sam = filetype.sam; cram = filetype.sam; };
+ matchSorting = sym: y: let f = x: match x.sorting { some = z: match z ( { coord = idst sym "coord"; name = idst sym "name"; } // y); none = abort "unknown sort for ${sym}"; }; in matchFiletype sym { bam = f; sam = f; cram = f; };
+ sorting = make-type "sorting" {
+ coord = {};
+ name = {};
+ };
+ coordSort = f: matchFiletype "coordSort" { bam = x: filetype.bam (x // {sorting = option-sort.some (sorting.coord {});}); } {filetype = f;};
+ nameSort = f: matchFiletype "nameSort" { bam = x: filetype.bam (x // {sorting = option-sort.some (sorting.name {});}); } {filetype = f;};
+ gunzip = matchFiletype "gunzip" { gz = x: x; };
+ bunzip2 = matchFiletype "bunzip2" { bz2 = x: x; };
+ tag = attrs: x: if x ? type && x.type == "derivation" then x // attrs else tagPassthru attrs x;
+ tagPassthru = attrs: x: if x ? passthru then x // { passthru = x.passthru // attrs; } else x // { passthru = attrs; };
+ tagFiletype = ft: tag { filetype = ft; };
diff --git a/test-tnpair.nix b/test-tnpair.nix
index 3260b7b..7deaf3b 100644
--- a/test-tnpair.nix
+++ b/test-tnpair.nix
@@ -14,15 +14,31 @@ in
with bionix;
- ref = { seq = ./example/ref.fa; };
- alignWithRG = rg: bwa.align { inherit ref; flags = "-R'@RG\\tID:${rg}\\tSM:${rg}'";};
+ fetchlocal = path: stdenv.mkDerivation {
+ name = baseNameOf path;
+ buildCommand = "ln -s ${path} $out";
+ };
+ fetchfq = attrs: types.tagFiletype (types.filetype.fq {}) (fetchlocal attrs);
+ fetchfa = attrs: types.tagFiletype (types.filetype.fa {}) (fetchlocal attrs);
+ alignWithRG = rg: bwa.align { ref = fetchfa ./example/ref.fa; flags = "-R'@RG\\tID:${rg}\\tSM:${rg}'";};
sort = samtools.sort {};
flagstat = samtools.flagstat {};
check = fastqc.check {};
- callVariants = strelka.call { inherit ref; };
+ callVariants = strelka.call {};
- tnpair = { tumour = {name = "mysample1"; files = {input1 = ./example/sample1-1.fq; input2 = ./example/sample1-2.fq;};};
- normal = {name = "mysample2"; files = {input1 = ./example/sample2-1.fq; input2 = ./example/sample2-1.fq;};};};
+ tnpair = {
+ tumour = {name = "mysample1"; files = {
+ input1 = fetchfq ./example/sample1-1.fq;
+ input2 = fetchfq ./example/sample1-2.fq;
+ };
+ };
+ normal = {name = "mysample2"; files = {
+ input1 = fetchfq ./example/sample2-1.fq;
+ input2 = fetchfq ./example/sample2-1.fq;
+ };
+ };
+ };
processPair = { tumour, normal }: rec {
alignments = mapAttrs (_: x: sort (alignWithRG x.name x.files)) { inherit normal tumour; };
@@ -37,8 +53,9 @@ let
mkdir $out
ln -s ${tnpairResult.variants} $out/strelka
mkdir $out/alignments
- ln -s ${tnpairResult.alignments.tumour} $out/alignments/${tnpair.tumour.name}.bam
- ln -s ${tnpairResult.alignments.normal} $out/alignments/${tnpair.normal.name}.bam
+ ln -s ${gridss.callVariants {} (with tnpairResult.alignments; [tumour])} $out/gridss
+ ln -s ${samtools.view { outfmt = types.toCram; } (tnpairResult.alignments.tumour)} $out/alignments/${tnpair.tumour.name}.cram
+ ln -s ${samtools.view { outfmt = types.toCram; } (tnpairResult.alignments.normal)} $out/alignments/${tnpair.normal.name}.cram
ln -s ${flagstat tnpairResult.alignments.tumour} $out/alignments/${tnpair.tumour.name}.flagstat
ln -s ${flagstat tnpairResult.alignments.normal} $out/alignments/${tnpair.normal.name}.flagstat
mkdir $out/fastqc
diff --git a/tools/bwa-index.nix b/tools/bwa-index.nix
index 48a2556..c879bc4 100644
--- a/tools/bwa-index.nix
+++ b/tools/bwa-index.nix
@@ -7,12 +7,15 @@ ref:
with nixpkgs;
with lib;
+with bionix.types;
+assert (matchFiletype "bwa-index" { fa = _: true; } ref);
stdenv.mkDerivation {
name = "bwa-index";
buildInputs = [ bwa ];
buildCommand = ''
- ln -s ${ref.seq} ref.fa
+ ln -s ${ref} ref.fa
bwa index ${optionalString (flags != null) flags} ref.fa
mkdir $out
mv ref.fa.* $out
diff --git a/tools/bwa-mem.nix b/tools/bwa-mem.nix
index ca9e6a8..0d0f7d8 100644
--- a/tools/bwa-mem.nix
+++ b/tools/bwa-mem.nix
@@ -12,12 +12,18 @@
with nixpkgs;
with lib;
+with bionix.types;
+with bionix.compression;
-stdenv.mkDerivation {
+ fa = f: matchFiletype "bwa-ref" { fa = _: f; } f;
+ fq = f: matchFiletype "bwa-input" { fq = _: f; } f;
+in stdenv.mkDerivation {
name = "bwa-mem";
buildInputs = [ bwa bc ] ++ optional bamOutput samtools;
buildCommand = ''
- ln -s ${ref.seq} ref.fa
+ ln -s ${fa ref} ref.fa
for f in ${bionix.bwa.index indexAttrs ref}/* ; do
ln -s $f
@@ -26,9 +32,10 @@ stdenv.mkDerivation {
>&2 echo "not enough build cores"
exit 1
- bwa mem ${optionalString (flags != null) flags} -t $cores ref.fa ${input1} \
- ${optionalString (input2 != null) input2} \
+ bwa mem ${optionalString (flags != null) flags} -t $cores ref.fa ${fq input1} \
+ ${optionalString (input2 != null) (fq input2)} \
${optionalString bamOutput "| samtools view -b"} \
> $out
+ passthru.filetype = if bamOutput then filetype.bam {ref = ref; sorting = option-sort.none;} else filetype.sam {ref = ref; sorting = option-sort.none;};
diff --git a/tools/compression.nix b/tools/compression.nix
new file mode 100644
index 0000000..aea7e13
--- /dev/null
+++ b/tools/compression.nix
@@ -0,0 +1,58 @@
+{bionix, nixpkgs}:
+with nixpkgs;
+with bionix;
+ uncompress = f: types.matchFiletype "uncompress" {
+ fa = _: f;
+ fq = _: f;
+ bam = _: f;
+ sam = _: f;
+ cram = _: f;
+ vcf = _: f;
+ bed = _: f;
+ gz = _: types.tagFiletype (types.gunzip f.filetype) (stdenv.mkDerivation {
+ name = "gunzip";
+ buildCommand = "gunzip < ${f} > $out";
+ });
+ bz2 = _: types.tagFiletype (types.bunzip2 f.filetype) (stdenv.mkDerivation {
+ name = "bunzip2";
+ buildCommand = "bunzip2 < ${f} > $out";
+ });
+ } f.filetype;
+ gzip = f:
+ let
+ gz = (stdenv.mkDerivation {
+ name = "gzip";
+ buildCommand = "gzip < ${f} > $out";
+ passthru = { filetype = types.filetype.gz f.filetype; };
+ });
+ in types.matchFiletype "compressed" {
+ fa = _: gz;
+ fq = _: gz;
+ bam = _: gz;
+ sam = _: gz;
+ cram = _: gz;
+ vcf = _: gz;
+ bed = _: gz;
+ } f;
+ bzip2 = f:
+ let
+ bz2 = (stdenv.mkDerivation {
+ name = "bzip2";
+ buildCommand = "bzip2 < ${f} > $out";
+ passthru = { filetype = types.filetype.bz2 f.filetype; };
+ });
+ in types.matchFiletype "compressed" {
+ fa = _: gz;
+ fq = _: gz;
+ bam = _: gz;
+ sam = _: gz;
+ cram = _: gz;
+ vcf = _: gz;
+ bed = _: gz;
+ } f;
diff --git a/tools/crumble-toCram.nix b/tools/crumble-toCram.nix
deleted file mode 100644
index 2544e17..0000000
--- a/tools/crumble-toCram.nix
+++ /dev/null
@@ -1,15 +0,0 @@
-{ bionix
-, nixpkgs
-, flags ? null
-with nixpkgs;
-with lib;
-stdenv.mkDerivation {
- name = "crumble";
- buildInputs = [ bionix.crumble.crumble ];
- buildCommand = "crumble ${optionalString (flags != null) flags} ${input} $out";
diff --git a/tools/crumble.nix b/tools/crumble.nix
index fb48c8f..6031fca 100644
--- a/tools/crumble.nix
+++ b/tools/crumble.nix
@@ -5,5 +5,4 @@ with bionix;
crumble = callPackage ./crumble-app.nix {};
- toCram = callBionix ./crumble-toCram.nix;
diff --git a/tools/gridss-callVariants.nix b/tools/gridss-callVariants.nix
index c313c3f..799c930 100644
--- a/tools/gridss-callVariants.nix
+++ b/tools/gridss-callVariants.nix
@@ -1,16 +1,25 @@
{ bionix
, nixpkgs
-, ref
, blacklist ? null
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
, flags ? null
+with nixpkgs;
with lib;
+with bionix.types;
+ getref = matchFiletype "gridss-callVariants" { bam = x: x.ref; };
+ refs = map getref inputs;
+ ref = head refs;
+assert (length (unique refs) == 1);
stdenv.mkDerivation rec {
name = "gridss-callVariants";
buildInputs = [ jre R bwa ];
@@ -19,7 +28,7 @@ stdenv.mkDerivation rec {
sha256 = "01srl3qvv060whqg1y1fpxjc5cwga5wscs1bmf1v3z87dignra7k";
buildCommand = ''
- ln -s ${ref.seq} ref.fa
+ ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
ln -s $f
diff --git a/tools/gridss.nix b/tools/gridss.nix
index edf9f57..7a2f217 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -3,5 +3,5 @@
with bionix;
- callVariants = callBiolnix ./gridss-callVariants.nix;
+ callVariants = callBionix ./gridss-callVariants.nix;
diff --git a/tools/platypus-callVariants.nix b/tools/platypus-callVariants.nix
index 7b68b28..a3e3a65 100644
--- a/tools/platypus-callVariants.nix
+++ b/tools/platypus-callVariants.nix
@@ -1,6 +1,5 @@
{ bionix
, nixpkgs
-, ref
, indexAttrs ? {}
, bamIndexAttrs ? {}
, flags ? null
@@ -8,16 +7,26 @@
+with nixpkgs;
with lib;
+with bionix.types;
-let filename = path: last (splitString "/" path);
-in stdenv.mkDerivation {
+ filename = path: last (splitString "/" path);
+ getref = f: matchFiletype "platypus-callVariants" { bam = r: r; } f;
+ refs = map getref inputs;
+ ref = head refs;
+assert (length (unique refs) == 1);
+stdenv.mkDerivation {
name = "platypus";
buildInputs = [ platypus ];
buildCommand = ''
- ln -s ${ref.seq} ref.fa
- ln -s ${bionix.samtools.faix indexAttrs ref} ref.fa.fai
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai
${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs}
${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs}
ls -l
@@ -28,4 +37,5 @@ in stdenv.mkDerivation {
-o $out \
--bamFiles=${concatMapStringsSep "," (p: "${filename p}.bam") inputs}
+ passthru.filetype = filetype.vcf {ref = ref;};
diff --git a/tools/platypus.nix b/tools/platypus.nix
index 88d88d7..0dfe397 100644
--- a/tools/platypus.nix
+++ b/tools/platypus.nix
@@ -1,7 +1,7 @@
{ bionix, nixpkgs }:
-with nixpkgs;
+with bionix;
- call = attrs: callPackage ./platypus-callVariants.nix attrs;
+ call = callBionix ./platypus-callVariants.nix;
diff --git a/tools/samtools-faidx.nix b/tools/samtools-faidx.nix
index bf32bdf..1fde411 100644
--- a/tools/samtools-faidx.nix
+++ b/tools/samtools-faidx.nix
@@ -7,6 +7,9 @@ input:
with nixpkgs;
with lib;
+with bionix.types;
+assert (matchFiletype "samtools-faidx" { fa = _: true; } input);
stdenv.mkDerivation {
diff --git a/tools/samtools-index.nix b/tools/samtools-index.nix
index 4e09dc7..aad46a7 100644
--- a/tools/samtools-index.nix
+++ b/tools/samtools-index.nix
@@ -7,6 +7,10 @@ input:
with nixpkgs;
with lib;
+with bionix.types;
+assert (matchFiletype "samtools-index" { bam = _: true; } input);
+assert (matchSorting "samtools-index" { coord = _: true; } input);
stdenv.mkDerivation {
name = "samtools-index";
diff --git a/tools/samtools-sort.nix b/tools/samtools-sort.nix
index 2de579e..ab9d603 100644
--- a/tools/samtools-sort.nix
+++ b/tools/samtools-sort.nix
@@ -2,6 +2,7 @@
, nixpkgs
, nameSort ? false
, flags ? null
+, outfmt ? null
@@ -9,10 +10,20 @@ input:
with nixpkgs;
with lib;
-stdenv.mkDerivation {
+ inherit (bionix.types) matchFiletype coordSort;
+assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: true; } input);
+ outfmtR = if outfmt != null then outfmt input else input.filetype;
+ outFmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = ref: "-O CRAM -T ${ref}"; } {filetype = outfmtR;};
+in stdenv.mkDerivation {
name = "samtools-sort";
buildInputs = [ samtools ];
buildCommand = ''
- samtools sort -@ $NIX_BUILD_CORES ${optionalString nameSort "-n"} ${optionalString (flags != null) flags} ${input} > $out
+ samtools sort -@ $NIX_BUILD_CORES ${optionalString nameSort "-n"} ${outFmtFlags} ${optionalString (flags != null) flags} ${input} > $out
+ passthru.filetype = if nameSort then bionix.types.nameSort outfmtR else coordSort outfmtR;
diff --git a/tools/samtools-view.nix b/tools/samtools-view.nix
new file mode 100644
index 0000000..e1cdac1
--- /dev/null
+++ b/tools/samtools-view.nix
@@ -0,0 +1,27 @@
+{ bionix
+, nixpkgs
+, nameSort ? false
+, flags ? null
+, outfmt ? null
+with nixpkgs;
+with lib;
+with bionix.types;
+assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: true; } input);
+ outfmtR = if outfmt != null then outfmt input else input.filetype;
+ fa = ref: matchFiletype "samtools-view-ref" { fa = _: ref; } ref;
+ outfmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = x: "-O CRAM -T ${fa x.ref}"; } {filetype = outfmtR;};
+in stdenv.mkDerivation {
+ name = "samtools-view";
+ buildInputs = [ samtools ];
+ buildCommand = ''
+ samtools view ${outfmtFlags} ${optionalString (flags != null) flags} ${input} > $out
+ '';
+ passthru.filetype = outfmtR;
diff --git a/tools/samtools.nix b/tools/samtools.nix
index 6b08c6e..89a96b0 100644
--- a/tools/samtools.nix
+++ b/tools/samtools.nix
@@ -3,6 +3,7 @@
with bionix;
+ view = callBionix ./samtools-view.nix;
faidx = callBionix ./samtools-faidx.nix;
flagstat = callBionix ./samtools-flagstat.nix;
index = callBionix ./samtools-index.nix;
diff --git a/tools/strelka-call.nix b/tools/strelka-call.nix
index ecdea84..bcbb6d1 100644
--- a/tools/strelka-call.nix
+++ b/tools/strelka-call.nix
@@ -1,6 +1,5 @@
{ bionix
, nixpkgs
-, ref
, indexAttrs ? {}
, bamIndexAttrs ? {}
, flags ? null
@@ -10,17 +9,25 @@
with nixpkgs;
with lib;
+with bionix.types;
filename = path: last (splitString "/" path);
+ getref = f: matchFiletype "strelka-call" { bam = x: x.ref; } f;
inputs = [ normal tumour ];
+ refs = map getref inputs;
+ ref = head refs;
-in stdenv.mkDerivation {
+assert (length (unique refs) == 1);
+stdenv.mkDerivation {
name = "strelka";
buildInputs = [ strelka ];
buildCommand = ''
- ln -s ${ref.seq} ref.fa
- ln -s ${bionix.samtools.faidx indexAttrs ref.seq} ref.fa.fai
+ ln -s ${ref} ref.fa
+ ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai
${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs}
${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs}