diff options
| -rw-r--r-- | default.nix | 1 | ||||
| -rwxr-xr-x | lib/references.nix | 162 | ||||
| -rw-r--r-- | tools/snpeff-annotate.nix | 21 | ||||
| -rw-r--r-- | tools/snpeff-dbnsfp.nix | 22 | ||||
| -rw-r--r-- | tools/snpeff.nix | 8 | 
5 files changed, 146 insertions, 68 deletions
diff --git a/default.nix b/default.nix index 27afcb5..538ddf4 100644 --- a/default.nix +++ b/default.nix @@ -26,6 +26,7 @@ let      platypus = callBionix ./tools/platypus.nix {};      samtools = callBionix ./tools/samtools.nix {};      strelka = callBionix ./tools/strelka.nix {}; +    snpeff = callBionix ./tools/snpeff.nix {};      ref = callBionix ./lib/references.nix {}; diff --git a/lib/references.nix b/lib/references.nix index 1ac92de..9d1b947 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -33,90 +33,116 @@ rec {        };        buildInputs = [ gawk ];        buildCommand = '' -        gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out -      ''; -      passthru.filetype = filetype.vcf { ref = seq; }; -    }; -    cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { -      name = "cosmic-grch38"; -      buildInputs = [ gawk ]; -      buildCommand = '' +      gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out +        ''; +        passthru.filetype = filetype.vcf { ref = seq; }; +      }; +      cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { +        name = "cosmic-grch38"; +        buildInputs = [ gawk ]; +        buildCommand = ''          gunzip < ${coding} | grep '^#' > $out          cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out -      ''; -      passthru.filetype = filetype.vcf { ref = seq; }; -    }; -    ensembl = { -      cdna = stdenvNoCC.mkDerivation rec { -        name = "ensembl-grch38-cdna-${version}"; -        version = "94"; -        src = fetchurl { -          url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; -          sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; +        ''; +        passthru.filetype = filetype.vcf { ref = seq; }; +      }; +      ensembl = { +        cdna = stdenvNoCC.mkDerivation rec { +          name = "ensembl-grch38-cdna-${version}"; +          version = "94"; +          src = fetchurl { +            url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; +            sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; +          }; +          buildCommand = "gunzip < $src > $out"; +          passthru.filetype = filetype.fa {}; +        }; +        ncrna = stdenvNoCC.mkDerivation rec { +          name = "ensembl-grch38-ncrna-${version}"; +          version = "94"; +          src = fetchurl { +            url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; +            sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; +          }; +          buildCommand = "gunzip < $src > $out"; +          passthru.filetype = filetype.fa {};          }; -        buildCommand = "gunzip < $src > $out"; -        passthru.filetype = filetype.fa {};        }; -      ncrna = stdenvNoCC.mkDerivation rec { -        name = "ensembl-grch38-ncrna-${version}"; -        version = "94"; -        src = fetchurl { -          url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; -          sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; +      snpeff = { +        db = stdenv.mkDerivation rec { +          name = "GRCh38.86"; +          src = fetchurl { +            url = "mirror://sourceforge/project/snpeff/databases/v4_3/snpEff_v4_3_${name}.zip"; +            sha256 = "1rf8q7l732ayjq2lpny4s75zpij05j00151374nqblk4wri2mz0i"; +          }; +          buildInputs = [ unzip ]; +          buildCommand = '' +            unzip ${src} +            mv data/${name} $out +          ''; +        }; +        dbnsfp = { +          db = requireFile { +            name = "dbNSFP.txt.gz"; +            message = "download the dbNSFP database manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlbTZodjlGUDZnTGc and add to nix store"; +            sha256 = "0gahnwkc7v2q6p6ixkhvsgqvvm6xf0c3bdh4nf0alih83h3wffd0"; +          }; +          index = requireFile { +            name = "dbNSFP.txt.gz.tbi"; +            message = "download the dbNSFP index manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlNVBJdFA5cFZRYkE and add to nix store"; +            sha256 = "18blkly6gvg7r0sx968xlb1zl2kqg5j1kpbrm2r7ajlxlfyvrx3w"; +          };          }; -        buildCommand = "gunzip < $src > $out"; -        passthru.filetype = filetype.fa {};        };      }; -  }; -  grcm38 = grcm38-p6; -  grcm38-p6 = { -    seq = stdenvNoCC.mkDerivation rec { -      name = "seq-grcm38.${version}"; -      version = "p6"; -      src = fetchurl { -        url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; -        sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; -      }; -      buildCommand = "gunzip < $src > $out"; -      passthru.filetype = filetype.fa {}; -    }; -    ensembl = { -      cdna = stdenvNoCC.mkDerivation rec { -        name = "ensembl-grch38-cdna-${version}"; -        version = "94"; +    grcm38 = grcm38-p6; +    grcm38-p6 = { +      seq = stdenvNoCC.mkDerivation rec { +        name = "seq-grcm38.${version}"; +        version = "p6";          src = fetchurl { -          url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; -          sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; +          url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; +          sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr";          };          buildCommand = "gunzip < $src > $out";          passthru.filetype = filetype.fa {};        }; -      ncrna = stdenvNoCC.mkDerivation rec { -        name = "ensembl-grch38-ncrna-${version}"; -        version = "94"; -        src = fetchurl { -          url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; -          sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; +      ensembl = { +        cdna = stdenvNoCC.mkDerivation rec { +          name = "ensembl-grch38-cdna-${version}"; +          version = "94"; +          src = fetchurl { +            url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; +            sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; +          }; +          buildCommand = "gunzip < $src > $out"; +          passthru.filetype = filetype.fa {}; +        }; +        ncrna = stdenvNoCC.mkDerivation rec { +          name = "ensembl-grch38-ncrna-${version}"; +          version = "94"; +          src = fetchurl { +            url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; +            sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; +          }; +          buildCommand = "gunzip < $src > $out"; +          passthru.filetype = filetype.fa {};          }; -        buildCommand = "gunzip < $src > $out"; -        passthru.filetype = filetype.fa {};        };      }; -  }; -  mm10 = mm10-p4; -  mm10-p4 = { +    mm10 = mm10-p4; +    mm10-p4 = {        seq = stdenvNoCC.mkDerivation rec { -          name = "seq-mm10.${version}"; -          version = "p4"; -          src = fetchurl { -              url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; -              sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; -              }; -          buildCommand = "gunzip < $src > $out"; -          passthru.filetype = filetype.fa {}; +        name = "seq-mm10.${version}"; +        version = "p4"; +        src = fetchurl { +          url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; +          sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; +        }; +        buildCommand = "gunzip < $src > $out"; +        passthru.filetype = filetype.fa {};        }; -  }; -} +    }; +  } diff --git a/tools/snpeff-annotate.nix b/tools/snpeff-annotate.nix new file mode 100644 index 0000000..a119c3c --- /dev/null +++ b/tools/snpeff-annotate.nix @@ -0,0 +1,21 @@ +{bionix +,nixpkgs +,db +,flags ? ""}: + +input: + +with nixpkgs; +with bionix.types; + +assert (matchFiletype "snpeff-annotate" { vcf = _: true; } input); + +stdenv.mkDerivation { +  name = "snpeff-annotate"; +  buildCommand = '' +    ln -s ${db} ${db.name} +    snpeff -nodownload -dataDir $TMPDIR ${db.name} ${input} > $out +  ''; +  buildInputs = [ snpeff ]; +  passthru.filetype = input.filetype; +} diff --git a/tools/snpeff-dbnsfp.nix b/tools/snpeff-dbnsfp.nix new file mode 100644 index 0000000..f9fbdaa --- /dev/null +++ b/tools/snpeff-dbnsfp.nix @@ -0,0 +1,22 @@ +{bionix +,nixpkgs +,dbnsfp +,flags ? ""}: + +input: + +with nixpkgs; +with bionix.types; + +assert (matchFiletype "snpeff-dbnsfp" { vcf = _: true; } input); + +stdenv.mkDerivation { +  name = "snpeff-dbnsfp"; +  buildCommand = '' +    ln -s ${dbnsfp.db} dbNSFP.txt.gz +    ln -s ${dbnsfp.index} dbNSFP.txt.gz.tbi +    snpeff dbnsfp -db dbNSFP.txt.gz ${input} > $out +  ''; +  buildInputs = [ snpeff ]; +  passthru.filetype = input.filetype; +} diff --git a/tools/snpeff.nix b/tools/snpeff.nix new file mode 100644 index 0000000..2df8056 --- /dev/null +++ b/tools/snpeff.nix @@ -0,0 +1,8 @@ +{bionix, nixpkgs}: + +with bionix; + +{ +  annotate = callBionixE ./snpeff-annotate.nix; +  dbnsfp = callBionixE ./snpeff-dbnsfp.nix; +}  | 
