From b029bff0ec25ec06758a5fdf7c79152d2ed5c032 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 4 Jan 2019 11:37:38 +1100 Subject: snpeff: init --- default.nix | 1 + lib/references.nix | 162 +++++++++++++++++++++++++++------------------- tools/snpeff-annotate.nix | 21 ++++++ tools/snpeff-dbnsfp.nix | 22 +++++++ tools/snpeff.nix | 8 +++ 5 files changed, 146 insertions(+), 68 deletions(-) create mode 100644 tools/snpeff-annotate.nix create mode 100644 tools/snpeff-dbnsfp.nix create mode 100644 tools/snpeff.nix diff --git a/default.nix b/default.nix index 27afcb5..538ddf4 100644 --- a/default.nix +++ b/default.nix @@ -26,6 +26,7 @@ let platypus = callBionix ./tools/platypus.nix {}; samtools = callBionix ./tools/samtools.nix {}; strelka = callBionix ./tools/strelka.nix {}; + snpeff = callBionix ./tools/snpeff.nix {}; ref = callBionix ./lib/references.nix {}; diff --git a/lib/references.nix b/lib/references.nix index 1ac92de..9d1b947 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -33,90 +33,116 @@ rec { }; buildInputs = [ gawk ]; buildCommand = '' - gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out - ''; - passthru.filetype = filetype.vcf { ref = seq; }; - }; - cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { - name = "cosmic-grch38"; - buildInputs = [ gawk ]; - buildCommand = '' + gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { + name = "cosmic-grch38"; + buildInputs = [ gawk ]; + buildCommand = '' gunzip < ${coding} | grep '^#' > $out cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out - ''; - passthru.filetype = filetype.vcf { ref = seq; }; - }; - ensembl = { - cdna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-cdna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; - sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + ensembl = { + cdna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-cdna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; + sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + ncrna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-ncrna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; + sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; - ncrna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-ncrna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; - sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; + snpeff = { + db = stdenv.mkDerivation rec { + name = "GRCh38.86"; + src = fetchurl { + url = "mirror://sourceforge/project/snpeff/databases/v4_3/snpEff_v4_3_${name}.zip"; + sha256 = "1rf8q7l732ayjq2lpny4s75zpij05j00151374nqblk4wri2mz0i"; + }; + buildInputs = [ unzip ]; + buildCommand = '' + unzip ${src} + mv data/${name} $out + ''; + }; + dbnsfp = { + db = requireFile { + name = "dbNSFP.txt.gz"; + message = "download the dbNSFP database manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlbTZodjlGUDZnTGc and add to nix store"; + sha256 = "0gahnwkc7v2q6p6ixkhvsgqvvm6xf0c3bdh4nf0alih83h3wffd0"; + }; + index = requireFile { + name = "dbNSFP.txt.gz.tbi"; + message = "download the dbNSFP index manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlNVBJdFA5cFZRYkE and add to nix store"; + sha256 = "18blkly6gvg7r0sx968xlb1zl2kqg5j1kpbrm2r7ajlxlfyvrx3w"; + }; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; }; - }; - grcm38 = grcm38-p6; - grcm38-p6 = { - seq = stdenvNoCC.mkDerivation rec { - name = "seq-grcm38.${version}"; - version = "p6"; - src = fetchurl { - url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; - sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; - }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; - }; - ensembl = { - cdna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-cdna-${version}"; - version = "94"; + grcm38 = grcm38-p6; + grcm38-p6 = { + seq = stdenvNoCC.mkDerivation rec { + name = "seq-grcm38.${version}"; + version = "p6"; src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; - sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; + url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; + sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; }; buildCommand = "gunzip < $src > $out"; passthru.filetype = filetype.fa {}; }; - ncrna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-ncrna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; - sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; + ensembl = { + cdna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-cdna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; + sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + ncrna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-ncrna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; + sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; }; - }; - mm10 = mm10-p4; - mm10-p4 = { + mm10 = mm10-p4; + mm10-p4 = { seq = stdenvNoCC.mkDerivation rec { - name = "seq-mm10.${version}"; - version = "p4"; - src = fetchurl { - url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; - sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; - }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; + name = "seq-mm10.${version}"; + version = "p4"; + src = fetchurl { + url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; + sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - }; -} + }; + } diff --git a/tools/snpeff-annotate.nix b/tools/snpeff-annotate.nix new file mode 100644 index 0000000..a119c3c --- /dev/null +++ b/tools/snpeff-annotate.nix @@ -0,0 +1,21 @@ +{bionix +,nixpkgs +,db +,flags ? ""}: + +input: + +with nixpkgs; +with bionix.types; + +assert (matchFiletype "snpeff-annotate" { vcf = _: true; } input); + +stdenv.mkDerivation { + name = "snpeff-annotate"; + buildCommand = '' + ln -s ${db} ${db.name} + snpeff -nodownload -dataDir $TMPDIR ${db.name} ${input} > $out + ''; + buildInputs = [ snpeff ]; + passthru.filetype = input.filetype; +} diff --git a/tools/snpeff-dbnsfp.nix b/tools/snpeff-dbnsfp.nix new file mode 100644 index 0000000..f9fbdaa --- /dev/null +++ b/tools/snpeff-dbnsfp.nix @@ -0,0 +1,22 @@ +{bionix +,nixpkgs +,dbnsfp +,flags ? ""}: + +input: + +with nixpkgs; +with bionix.types; + +assert (matchFiletype "snpeff-dbnsfp" { vcf = _: true; } input); + +stdenv.mkDerivation { + name = "snpeff-dbnsfp"; + buildCommand = '' + ln -s ${dbnsfp.db} dbNSFP.txt.gz + ln -s ${dbnsfp.index} dbNSFP.txt.gz.tbi + snpeff dbnsfp -db dbNSFP.txt.gz ${input} > $out + ''; + buildInputs = [ snpeff ]; + passthru.filetype = input.filetype; +} diff --git a/tools/snpeff.nix b/tools/snpeff.nix new file mode 100644 index 0000000..2df8056 --- /dev/null +++ b/tools/snpeff.nix @@ -0,0 +1,8 @@ +{bionix, nixpkgs}: + +with bionix; + +{ + annotate = callBionixE ./snpeff-annotate.nix; + dbnsfp = callBionixE ./snpeff-dbnsfp.nix; +} -- cgit v1.2.3