From 871ef64f3c43199dfa01216ac86db56650c2c8a2 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 5 Oct 2018 16:16:20 +1000 Subject: implement types --- default.nix | 4 +++ lib/references.nix | 5 ++++ lib/types.nix | 64 +++++++++++++++++++++++++++++++++++++++++ test-tnpair.nix | 31 +++++++++++++++----- tools/bwa-index.nix | 5 +++- tools/bwa-mem.nix | 15 +++++++--- tools/compression.nix | 58 +++++++++++++++++++++++++++++++++++++ tools/crumble-toCram.nix | 15 ---------- tools/crumble.nix | 1 - tools/gridss-callVariants.nix | 13 +++++++-- tools/gridss.nix | 2 +- tools/platypus-callVariants.nix | 20 +++++++++---- tools/platypus.nix | 4 +-- tools/samtools-faidx.nix | 3 ++ tools/samtools-index.nix | 4 +++ tools/samtools-sort.nix | 15 ++++++++-- tools/samtools-view.nix | 27 +++++++++++++++++ tools/samtools.nix | 1 + tools/strelka-call.nix | 15 +++++++--- 19 files changed, 258 insertions(+), 44 deletions(-) create mode 100644 lib/types.nix create mode 100644 tools/compression.nix delete mode 100644 tools/crumble-toCram.nix create mode 100644 tools/samtools-view.nix diff --git a/default.nix b/default.nix index 1a8f341..7002a57 100644 --- a/default.nix +++ b/default.nix @@ -7,7 +7,10 @@ let callBionix = callBionix; id = x: x; + types = callBionix ./lib/types.nix {}; + bwa = callBionix ./tools/bwa.nix {}; + compression = callBionix ./tools/compression.nix {}; crumble = callBionix ./tools/crumble.nix {}; fastqc = callBionix ./tools/fastqc.nix {}; gridss = callBionix ./tools/gridss.nix {}; @@ -22,5 +25,6 @@ let ref = callBionix ./lib/references.nix {}; def = f: defs: attrs: f (defs // attrs); defQsub = qsubAttrs: f: defs: qsubAttr qsubAttrs (def f defs); + }); in bionix diff --git a/lib/references.nix b/lib/references.nix index 61655cd..3614647 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -1,6 +1,7 @@ { bionix, nixpkgs }: with nixpkgs; +with bionix.types; rec { grch38 = grch38-p12; @@ -13,6 +14,7 @@ rec { sha256 = "0ji2ggpmgnbpwbhq8mirj6h3lyy02nl2rnz7n892iq5cqpsblh4z"; }; buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; blacklist = stdenvNoCC.mkDerivation { name = "blacklist-grch38"; @@ -21,6 +23,7 @@ rec { sha256 = "1lpnqq1mjidbdxj5i6x26rxa8x1rs8q3hlf0z1z49j3jsnkgffky"; }; buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.bed { ref = seq; }; }; }; @@ -34,6 +37,7 @@ rec { sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; }; buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; }; @@ -47,6 +51,7 @@ rec { sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; }; buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; }; } diff --git a/lib/types.nix b/lib/types.nix new file mode 100644 index 0000000..a347046 --- /dev/null +++ b/lib/types.nix @@ -0,0 +1,64 @@ +{bionix, nixpkgs}: + +with nixpkgs; + +let + nix-adt-src = fetchFromGitHub { + owner = "shlevy"; + repo = "nix-adt"; + rev = "dd04b5d08eed65ecd73aafde56a78078e09f1c74"; + sha256 = "0vhk1y7gilgn2pgvj4813bh2ljpw4pvrph8k8b0fsg56dbm8mqxa"; + }; + nix-adt = import "${nix-adt-src}"; + inherit (nix-adt.checked) make-type match any std none; + inherit (std) option; + + idft = sym: ft: _: abort "unhandled filetype (${ft}) for ${sym}"; + idst = sym: st: _: abort "unhandled sorting (${st}) for ${sym}"; + +in +rec { + option-sort = option sorting; + + matchFiletype = sym: y: x: if x ? filetype then match x.filetype ({ + fa = idft sym "fasta"; + fq = idft sym "fastq"; + bam = idft sym "bam"; + sam = idft sym "sam"; + cram = idft sym "cram"; + vcf = idft sym "vcf"; + bed = idft sym "bed"; + gz = idft sym "gz"; + bz2 = idft sym "bz2"; + } // y) else abort "unknown filetype for ${sym}"; + filetype = make-type "filetype" { + fa = {}; + fq = {}; + bam = {ref = any; sorting = option-sort;}; + sam = {ref = any; sorting = option-sort;}; + cram = {ref = any; sorting = option-sort;}; + vcf = {ref = any;}; + bed = {ref = any;}; + gz = filetype; + bz2 = filetype; + }; + + toCram = matchFiletype "bam2cram" { bam = filetype.cram; sam = filetype.cram; cram = filetype.cram; }; + toBam = matchFiletype "bam2cram" { bam = filetype.bam; sam = filetype.bam; cram = filetype.bam; }; + toSam = matchFiletype "bam2cram" { bam = filetype.sam; sam = filetype.sam; cram = filetype.sam; }; + + matchSorting = sym: y: let f = x: match x.sorting { some = z: match z ( { coord = idst sym "coord"; name = idst sym "name"; } // y); none = abort "unknown sort for ${sym}"; }; in matchFiletype sym { bam = f; sam = f; cram = f; }; + sorting = make-type "sorting" { + coord = {}; + name = {}; + }; + coordSort = f: matchFiletype "coordSort" { bam = x: filetype.bam (x // {sorting = option-sort.some (sorting.coord {});}); } {filetype = f;}; + nameSort = f: matchFiletype "nameSort" { bam = x: filetype.bam (x // {sorting = option-sort.some (sorting.name {});}); } {filetype = f;}; + + gunzip = matchFiletype "gunzip" { gz = x: x; }; + bunzip2 = matchFiletype "bunzip2" { bz2 = x: x; }; + + tag = attrs: x: if x ? type && x.type == "derivation" then x // attrs else tagPassthru attrs x; + tagPassthru = attrs: x: if x ? passthru then x // { passthru = x.passthru // attrs; } else x // { passthru = attrs; }; + tagFiletype = ft: tag { filetype = ft; }; +} diff --git a/test-tnpair.nix b/test-tnpair.nix index 3260b7b..7deaf3b 100644 --- a/test-tnpair.nix +++ b/test-tnpair.nix @@ -14,15 +14,31 @@ in with bionix; let - ref = { seq = ./example/ref.fa; }; - alignWithRG = rg: bwa.align { inherit ref; flags = "-R'@RG\\tID:${rg}\\tSM:${rg}'";}; + fetchlocal = path: stdenv.mkDerivation { + name = baseNameOf path; + buildCommand = "ln -s ${path} $out"; + }; + fetchfq = attrs: types.tagFiletype (types.filetype.fq {}) (fetchlocal attrs); + fetchfa = attrs: types.tagFiletype (types.filetype.fa {}) (fetchlocal attrs); + + alignWithRG = rg: bwa.align { ref = fetchfa ./example/ref.fa; flags = "-R'@RG\\tID:${rg}\\tSM:${rg}'";}; sort = samtools.sort {}; flagstat = samtools.flagstat {}; check = fastqc.check {}; - callVariants = strelka.call { inherit ref; }; + callVariants = strelka.call {}; - tnpair = { tumour = {name = "mysample1"; files = {input1 = ./example/sample1-1.fq; input2 = ./example/sample1-2.fq;};}; - normal = {name = "mysample2"; files = {input1 = ./example/sample2-1.fq; input2 = ./example/sample2-1.fq;};};}; + tnpair = { + tumour = {name = "mysample1"; files = { + input1 = fetchfq ./example/sample1-1.fq; + input2 = fetchfq ./example/sample1-2.fq; + }; + }; + normal = {name = "mysample2"; files = { + input1 = fetchfq ./example/sample2-1.fq; + input2 = fetchfq ./example/sample2-1.fq; + }; + }; + }; processPair = { tumour, normal }: rec { alignments = mapAttrs (_: x: sort (alignWithRG x.name x.files)) { inherit normal tumour; }; @@ -37,8 +53,9 @@ let mkdir $out ln -s ${tnpairResult.variants} $out/strelka mkdir $out/alignments - ln -s ${tnpairResult.alignments.tumour} $out/alignments/${tnpair.tumour.name}.bam - ln -s ${tnpairResult.alignments.normal} $out/alignments/${tnpair.normal.name}.bam + ln -s ${gridss.callVariants {} (with tnpairResult.alignments; [tumour])} $out/gridss + ln -s ${samtools.view { outfmt = types.toCram; } (tnpairResult.alignments.tumour)} $out/alignments/${tnpair.tumour.name}.cram + ln -s ${samtools.view { outfmt = types.toCram; } (tnpairResult.alignments.normal)} $out/alignments/${tnpair.normal.name}.cram ln -s ${flagstat tnpairResult.alignments.tumour} $out/alignments/${tnpair.tumour.name}.flagstat ln -s ${flagstat tnpairResult.alignments.normal} $out/alignments/${tnpair.normal.name}.flagstat mkdir $out/fastqc diff --git a/tools/bwa-index.nix b/tools/bwa-index.nix index 48a2556..c879bc4 100644 --- a/tools/bwa-index.nix +++ b/tools/bwa-index.nix @@ -7,12 +7,15 @@ ref: with nixpkgs; with lib; +with bionix.types; + +assert (matchFiletype "bwa-index" { fa = _: true; } ref); stdenv.mkDerivation { name = "bwa-index"; buildInputs = [ bwa ]; buildCommand = '' - ln -s ${ref.seq} ref.fa + ln -s ${ref} ref.fa bwa index ${optionalString (flags != null) flags} ref.fa mkdir $out mv ref.fa.* $out diff --git a/tools/bwa-mem.nix b/tools/bwa-mem.nix index ca9e6a8..0d0f7d8 100644 --- a/tools/bwa-mem.nix +++ b/tools/bwa-mem.nix @@ -12,12 +12,18 @@ with nixpkgs; with lib; +with bionix.types; +with bionix.compression; -stdenv.mkDerivation { +let + fa = f: matchFiletype "bwa-ref" { fa = _: f; } f; + fq = f: matchFiletype "bwa-input" { fq = _: f; } f; + +in stdenv.mkDerivation { name = "bwa-mem"; buildInputs = [ bwa bc ] ++ optional bamOutput samtools; buildCommand = '' - ln -s ${ref.seq} ref.fa + ln -s ${fa ref} ref.fa for f in ${bionix.bwa.index indexAttrs ref}/* ; do ln -s $f done @@ -26,9 +32,10 @@ stdenv.mkDerivation { >&2 echo "not enough build cores" exit 1 fi - bwa mem ${optionalString (flags != null) flags} -t $cores ref.fa ${input1} \ - ${optionalString (input2 != null) input2} \ + bwa mem ${optionalString (flags != null) flags} -t $cores ref.fa ${fq input1} \ + ${optionalString (input2 != null) (fq input2)} \ ${optionalString bamOutput "| samtools view -b"} \ > $out ''; + passthru.filetype = if bamOutput then filetype.bam {ref = ref; sorting = option-sort.none;} else filetype.sam {ref = ref; sorting = option-sort.none;}; } diff --git a/tools/compression.nix b/tools/compression.nix new file mode 100644 index 0000000..aea7e13 --- /dev/null +++ b/tools/compression.nix @@ -0,0 +1,58 @@ +{bionix, nixpkgs}: + +with nixpkgs; +with bionix; + +{ + uncompress = f: types.matchFiletype "uncompress" { + fa = _: f; + fq = _: f; + bam = _: f; + sam = _: f; + cram = _: f; + vcf = _: f; + bed = _: f; + gz = _: types.tagFiletype (types.gunzip f.filetype) (stdenv.mkDerivation { + name = "gunzip"; + buildCommand = "gunzip < ${f} > $out"; + }); + bz2 = _: types.tagFiletype (types.bunzip2 f.filetype) (stdenv.mkDerivation { + name = "bunzip2"; + buildCommand = "bunzip2 < ${f} > $out"; + }); + } f.filetype; + + gzip = f: + let + gz = (stdenv.mkDerivation { + name = "gzip"; + buildCommand = "gzip < ${f} > $out"; + passthru = { filetype = types.filetype.gz f.filetype; }; + }); + in types.matchFiletype "compressed" { + fa = _: gz; + fq = _: gz; + bam = _: gz; + sam = _: gz; + cram = _: gz; + vcf = _: gz; + bed = _: gz; + } f; + + bzip2 = f: + let + bz2 = (stdenv.mkDerivation { + name = "bzip2"; + buildCommand = "bzip2 < ${f} > $out"; + passthru = { filetype = types.filetype.bz2 f.filetype; }; + }); + in types.matchFiletype "compressed" { + fa = _: gz; + fq = _: gz; + bam = _: gz; + sam = _: gz; + cram = _: gz; + vcf = _: gz; + bed = _: gz; + } f; +} diff --git a/tools/crumble-toCram.nix b/tools/crumble-toCram.nix deleted file mode 100644 index 2544e17..0000000 --- a/tools/crumble-toCram.nix +++ /dev/null @@ -1,15 +0,0 @@ -{ bionix -, nixpkgs -, flags ? null -}: - -with nixpkgs; -with lib; - -input: - -stdenv.mkDerivation { - name = "crumble"; - buildInputs = [ bionix.crumble.crumble ]; - buildCommand = "crumble ${optionalString (flags != null) flags} ${input} $out"; -} diff --git a/tools/crumble.nix b/tools/crumble.nix index fb48c8f..6031fca 100644 --- a/tools/crumble.nix +++ b/tools/crumble.nix @@ -5,5 +5,4 @@ with bionix; { crumble = callPackage ./crumble-app.nix {}; - toCram = callBionix ./crumble-toCram.nix; } diff --git a/tools/gridss-callVariants.nix b/tools/gridss-callVariants.nix index c313c3f..799c930 100644 --- a/tools/gridss-callVariants.nix +++ b/tools/gridss-callVariants.nix @@ -1,16 +1,25 @@ { bionix , nixpkgs -, ref , blacklist ? null , bwaIndexAttrs ? {} , faidxAttrs ? {} , flags ? null }: +with nixpkgs; with lib; +with bionix.types; inputs: +let + getref = matchFiletype "gridss-callVariants" { bam = x: x.ref; }; + refs = map getref inputs; + ref = head refs; +in + +assert (length (unique refs) == 1); + stdenv.mkDerivation rec { name = "gridss-callVariants"; buildInputs = [ jre R bwa ]; @@ -19,7 +28,7 @@ stdenv.mkDerivation rec { sha256 = "01srl3qvv060whqg1y1fpxjc5cwga5wscs1bmf1v3z87dignra7k"; }; buildCommand = '' - ln -s ${ref.seq} ref.fa + ln -s ${ref} ref.fa ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do ln -s $f diff --git a/tools/gridss.nix b/tools/gridss.nix index edf9f57..7a2f217 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -3,5 +3,5 @@ with bionix; { - callVariants = callBiolnix ./gridss-callVariants.nix; + callVariants = callBionix ./gridss-callVariants.nix; } diff --git a/tools/platypus-callVariants.nix b/tools/platypus-callVariants.nix index 7b68b28..a3e3a65 100644 --- a/tools/platypus-callVariants.nix +++ b/tools/platypus-callVariants.nix @@ -1,6 +1,5 @@ { bionix , nixpkgs -, ref , indexAttrs ? {} , bamIndexAttrs ? {} , flags ? null @@ -8,16 +7,26 @@ inputs: +with nixpkgs; with lib; +with bionix.types; -let filename = path: last (splitString "/" path); -in stdenv.mkDerivation { +let + filename = path: last (splitString "/" path); + getref = f: matchFiletype "platypus-callVariants" { bam = r: r; } f; + refs = map getref inputs; + ref = head refs; +in + +assert (length (unique refs) == 1); + +stdenv.mkDerivation { name = "platypus"; buildInputs = [ platypus ]; buildCommand = '' - ln -s ${ref.seq} ref.fa - ln -s ${bionix.samtools.faix indexAttrs ref} ref.fa.fai + ln -s ${ref} ref.fa + ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai ${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs} ${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs} ls -l @@ -28,4 +37,5 @@ in stdenv.mkDerivation { -o $out \ --bamFiles=${concatMapStringsSep "," (p: "${filename p}.bam") inputs} ''; + passthru.filetype = filetype.vcf {ref = ref;}; } diff --git a/tools/platypus.nix b/tools/platypus.nix index 88d88d7..0dfe397 100644 --- a/tools/platypus.nix +++ b/tools/platypus.nix @@ -1,7 +1,7 @@ { bionix, nixpkgs }: -with nixpkgs; +with bionix; { - call = attrs: callPackage ./platypus-callVariants.nix attrs; + call = callBionix ./platypus-callVariants.nix; } diff --git a/tools/samtools-faidx.nix b/tools/samtools-faidx.nix index bf32bdf..1fde411 100644 --- a/tools/samtools-faidx.nix +++ b/tools/samtools-faidx.nix @@ -7,6 +7,9 @@ input: with nixpkgs; with lib; +with bionix.types; + +assert (matchFiletype "samtools-faidx" { fa = _: true; } input); stdenv.mkDerivation { diff --git a/tools/samtools-index.nix b/tools/samtools-index.nix index 4e09dc7..aad46a7 100644 --- a/tools/samtools-index.nix +++ b/tools/samtools-index.nix @@ -7,6 +7,10 @@ input: with nixpkgs; with lib; +with bionix.types; + +assert (matchFiletype "samtools-index" { bam = _: true; } input); +assert (matchSorting "samtools-index" { coord = _: true; } input); stdenv.mkDerivation { name = "samtools-index"; diff --git a/tools/samtools-sort.nix b/tools/samtools-sort.nix index 2de579e..ab9d603 100644 --- a/tools/samtools-sort.nix +++ b/tools/samtools-sort.nix @@ -2,6 +2,7 @@ , nixpkgs , nameSort ? false , flags ? null +, outfmt ? null }: input: @@ -9,10 +10,20 @@ input: with nixpkgs; with lib; -stdenv.mkDerivation { +let + inherit (bionix.types) matchFiletype coordSort; +in + +assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: true; } input); + +let + outfmtR = if outfmt != null then outfmt input else input.filetype; + outFmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = ref: "-O CRAM -T ${ref}"; } {filetype = outfmtR;}; +in stdenv.mkDerivation { name = "samtools-sort"; buildInputs = [ samtools ]; buildCommand = '' - samtools sort -@ $NIX_BUILD_CORES ${optionalString nameSort "-n"} ${optionalString (flags != null) flags} ${input} > $out + samtools sort -@ $NIX_BUILD_CORES ${optionalString nameSort "-n"} ${outFmtFlags} ${optionalString (flags != null) flags} ${input} > $out ''; + passthru.filetype = if nameSort then bionix.types.nameSort outfmtR else coordSort outfmtR; } diff --git a/tools/samtools-view.nix b/tools/samtools-view.nix new file mode 100644 index 0000000..e1cdac1 --- /dev/null +++ b/tools/samtools-view.nix @@ -0,0 +1,27 @@ +{ bionix +, nixpkgs +, nameSort ? false +, flags ? null +, outfmt ? null +}: + +input: + +with nixpkgs; +with lib; +with bionix.types; + +assert (matchFiletype "samtools-sort" { bam = _: true; sam = _: true; cram = _: true; } input); + +let + outfmtR = if outfmt != null then outfmt input else input.filetype; + fa = ref: matchFiletype "samtools-view-ref" { fa = _: ref; } ref; + outfmtFlags = matchFiletype "samtools-sort-outfmt" { bam = _: "-O BAM"; sam = _: "-O SAM"; cram = x: "-O CRAM -T ${fa x.ref}"; } {filetype = outfmtR;}; +in stdenv.mkDerivation { + name = "samtools-view"; + buildInputs = [ samtools ]; + buildCommand = '' + samtools view ${outfmtFlags} ${optionalString (flags != null) flags} ${input} > $out + ''; + passthru.filetype = outfmtR; +} diff --git a/tools/samtools.nix b/tools/samtools.nix index 6b08c6e..89a96b0 100644 --- a/tools/samtools.nix +++ b/tools/samtools.nix @@ -3,6 +3,7 @@ with bionix; { + view = callBionix ./samtools-view.nix; faidx = callBionix ./samtools-faidx.nix; flagstat = callBionix ./samtools-flagstat.nix; index = callBionix ./samtools-index.nix; diff --git a/tools/strelka-call.nix b/tools/strelka-call.nix index ecdea84..bcbb6d1 100644 --- a/tools/strelka-call.nix +++ b/tools/strelka-call.nix @@ -1,6 +1,5 @@ { bionix , nixpkgs -, ref , indexAttrs ? {} , bamIndexAttrs ? {} , flags ? null @@ -10,17 +9,25 @@ with nixpkgs; with lib; +with bionix.types; let filename = path: last (splitString "/" path); + getref = f: matchFiletype "strelka-call" { bam = x: x.ref; } f; inputs = [ normal tumour ]; + refs = map getref inputs; + ref = head refs; -in stdenv.mkDerivation { +in + +assert (length (unique refs) == 1); + +stdenv.mkDerivation { name = "strelka"; buildInputs = [ strelka ]; buildCommand = '' - ln -s ${ref.seq} ref.fa - ln -s ${bionix.samtools.faidx indexAttrs ref.seq} ref.fa.fai + ln -s ${ref} ref.fa + ln -s ${bionix.samtools.faidx indexAttrs ref} ref.fa.fai ${concatMapStringsSep "\n" (p: "ln -s ${p} ${filename p}.bam") inputs} ${concatMapStringsSep "\n" (p: "ln -s ${bionix.samtools.index bamIndexAttrs p} ${filename p}.bai") inputs} -- cgit v1.2.3