aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--default.nix1
-rwxr-xr-xlib/references.nix162
-rw-r--r--tools/snpeff-annotate.nix21
-rw-r--r--tools/snpeff-dbnsfp.nix22
-rw-r--r--tools/snpeff.nix8
5 files changed, 146 insertions, 68 deletions
diff --git a/default.nix b/default.nix
index 27afcb5..538ddf4 100644
--- a/default.nix
+++ b/default.nix
@@ -26,6 +26,7 @@ let
platypus = callBionix ./tools/platypus.nix {};
samtools = callBionix ./tools/samtools.nix {};
strelka = callBionix ./tools/strelka.nix {};
+ snpeff = callBionix ./tools/snpeff.nix {};
ref = callBionix ./lib/references.nix {};
diff --git a/lib/references.nix b/lib/references.nix
index 1ac92de..9d1b947 100755
--- a/lib/references.nix
+++ b/lib/references.nix
@@ -33,90 +33,116 @@ rec {
};
buildInputs = [ gawk ];
buildCommand = ''
- gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out
- '';
- passthru.filetype = filetype.vcf { ref = seq; };
- };
- cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec {
- name = "cosmic-grch38";
- buildInputs = [ gawk ];
- buildCommand = ''
+ gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out
+ '';
+ passthru.filetype = filetype.vcf { ref = seq; };
+ };
+ cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec {
+ name = "cosmic-grch38";
+ buildInputs = [ gawk ];
+ buildCommand = ''
gunzip < ${coding} | grep '^#' > $out
cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out
- '';
- passthru.filetype = filetype.vcf { ref = seq; };
- };
- ensembl = {
- cdna = stdenvNoCC.mkDerivation rec {
- name = "ensembl-grch38-cdna-${version}";
- version = "94";
- src = fetchurl {
- url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz";
- sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h";
+ '';
+ passthru.filetype = filetype.vcf { ref = seq; };
+ };
+ ensembl = {
+ cdna = stdenvNoCC.mkDerivation rec {
+ name = "ensembl-grch38-cdna-${version}";
+ version = "94";
+ src = fetchurl {
+ url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz";
+ sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h";
+ };
+ buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
+ };
+ ncrna = stdenvNoCC.mkDerivation rec {
+ name = "ensembl-grch38-ncrna-${version}";
+ version = "94";
+ src = fetchurl {
+ url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz";
+ sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff";
+ };
+ buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
};
- buildCommand = "gunzip < $src > $out";
- passthru.filetype = filetype.fa {};
};
- ncrna = stdenvNoCC.mkDerivation rec {
- name = "ensembl-grch38-ncrna-${version}";
- version = "94";
- src = fetchurl {
- url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz";
- sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff";
+ snpeff = {
+ db = stdenv.mkDerivation rec {
+ name = "GRCh38.86";
+ src = fetchurl {
+ url = "mirror://sourceforge/project/snpeff/databases/v4_3/snpEff_v4_3_${name}.zip";
+ sha256 = "1rf8q7l732ayjq2lpny4s75zpij05j00151374nqblk4wri2mz0i";
+ };
+ buildInputs = [ unzip ];
+ buildCommand = ''
+ unzip ${src}
+ mv data/${name} $out
+ '';
+ };
+ dbnsfp = {
+ db = requireFile {
+ name = "dbNSFP.txt.gz";
+ message = "download the dbNSFP database manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlbTZodjlGUDZnTGc and add to nix store";
+ sha256 = "0gahnwkc7v2q6p6ixkhvsgqvvm6xf0c3bdh4nf0alih83h3wffd0";
+ };
+ index = requireFile {
+ name = "dbNSFP.txt.gz.tbi";
+ message = "download the dbNSFP index manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlNVBJdFA5cFZRYkE and add to nix store";
+ sha256 = "18blkly6gvg7r0sx968xlb1zl2kqg5j1kpbrm2r7ajlxlfyvrx3w";
+ };
};
- buildCommand = "gunzip < $src > $out";
- passthru.filetype = filetype.fa {};
};
};
- };
- grcm38 = grcm38-p6;
- grcm38-p6 = {
- seq = stdenvNoCC.mkDerivation rec {
- name = "seq-grcm38.${version}";
- version = "p6";
- src = fetchurl {
- url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz";
- sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr";
- };
- buildCommand = "gunzip < $src > $out";
- passthru.filetype = filetype.fa {};
- };
- ensembl = {
- cdna = stdenvNoCC.mkDerivation rec {
- name = "ensembl-grch38-cdna-${version}";
- version = "94";
+ grcm38 = grcm38-p6;
+ grcm38-p6 = {
+ seq = stdenvNoCC.mkDerivation rec {
+ name = "seq-grcm38.${version}";
+ version = "p6";
src = fetchurl {
- url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz";
- sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d";
+ url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz";
+ sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr";
};
buildCommand = "gunzip < $src > $out";
passthru.filetype = filetype.fa {};
};
- ncrna = stdenvNoCC.mkDerivation rec {
- name = "ensembl-grch38-ncrna-${version}";
- version = "94";
- src = fetchurl {
- url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz";
- sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd";
+ ensembl = {
+ cdna = stdenvNoCC.mkDerivation rec {
+ name = "ensembl-grch38-cdna-${version}";
+ version = "94";
+ src = fetchurl {
+ url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz";
+ sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d";
+ };
+ buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
+ };
+ ncrna = stdenvNoCC.mkDerivation rec {
+ name = "ensembl-grch38-ncrna-${version}";
+ version = "94";
+ src = fetchurl {
+ url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz";
+ sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd";
+ };
+ buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
};
- buildCommand = "gunzip < $src > $out";
- passthru.filetype = filetype.fa {};
};
};
- };
- mm10 = mm10-p4;
- mm10-p4 = {
+ mm10 = mm10-p4;
+ mm10-p4 = {
seq = stdenvNoCC.mkDerivation rec {
- name = "seq-mm10.${version}";
- version = "p4";
- src = fetchurl {
- url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz";
- sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c";
- };
- buildCommand = "gunzip < $src > $out";
- passthru.filetype = filetype.fa {};
+ name = "seq-mm10.${version}";
+ version = "p4";
+ src = fetchurl {
+ url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz";
+ sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c";
+ };
+ buildCommand = "gunzip < $src > $out";
+ passthru.filetype = filetype.fa {};
};
- };
-}
+ };
+ }
diff --git a/tools/snpeff-annotate.nix b/tools/snpeff-annotate.nix
new file mode 100644
index 0000000..a119c3c
--- /dev/null
+++ b/tools/snpeff-annotate.nix
@@ -0,0 +1,21 @@
+{bionix
+,nixpkgs
+,db
+,flags ? ""}:
+
+input:
+
+with nixpkgs;
+with bionix.types;
+
+assert (matchFiletype "snpeff-annotate" { vcf = _: true; } input);
+
+stdenv.mkDerivation {
+ name = "snpeff-annotate";
+ buildCommand = ''
+ ln -s ${db} ${db.name}
+ snpeff -nodownload -dataDir $TMPDIR ${db.name} ${input} > $out
+ '';
+ buildInputs = [ snpeff ];
+ passthru.filetype = input.filetype;
+}
diff --git a/tools/snpeff-dbnsfp.nix b/tools/snpeff-dbnsfp.nix
new file mode 100644
index 0000000..f9fbdaa
--- /dev/null
+++ b/tools/snpeff-dbnsfp.nix
@@ -0,0 +1,22 @@
+{bionix
+,nixpkgs
+,dbnsfp
+,flags ? ""}:
+
+input:
+
+with nixpkgs;
+with bionix.types;
+
+assert (matchFiletype "snpeff-dbnsfp" { vcf = _: true; } input);
+
+stdenv.mkDerivation {
+ name = "snpeff-dbnsfp";
+ buildCommand = ''
+ ln -s ${dbnsfp.db} dbNSFP.txt.gz
+ ln -s ${dbnsfp.index} dbNSFP.txt.gz.tbi
+ snpeff dbnsfp -db dbNSFP.txt.gz ${input} > $out
+ '';
+ buildInputs = [ snpeff ];
+ passthru.filetype = input.filetype;
+}
diff --git a/tools/snpeff.nix b/tools/snpeff.nix
new file mode 100644
index 0000000..2df8056
--- /dev/null
+++ b/tools/snpeff.nix
@@ -0,0 +1,8 @@
+{bionix, nixpkgs}:
+
+with bionix;
+
+{
+ annotate = callBionixE ./snpeff-annotate.nix;
+ dbnsfp = callBionixE ./snpeff-dbnsfp.nix;
+}