From b029bff0ec25ec06758a5fdf7c79152d2ed5c032 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 4 Jan 2019 11:37:38 +1100 Subject: snpeff: init --- lib/references.nix | 162 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 94 insertions(+), 68 deletions(-) (limited to 'lib') diff --git a/lib/references.nix b/lib/references.nix index 1ac92de..9d1b947 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -33,90 +33,116 @@ rec { }; buildInputs = [ gawk ]; buildCommand = '' - gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out - ''; - passthru.filetype = filetype.vcf { ref = seq; }; - }; - cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { - name = "cosmic-grch38"; - buildInputs = [ gawk ]; - buildCommand = '' + gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { + name = "cosmic-grch38"; + buildInputs = [ gawk ]; + buildCommand = '' gunzip < ${coding} | grep '^#' > $out cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out - ''; - passthru.filetype = filetype.vcf { ref = seq; }; - }; - ensembl = { - cdna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-cdna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; - sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + ensembl = { + cdna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-cdna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/cdna/Homo_sapiens.GRCh38.cdna.all.fa.gz"; + sha256 = "1fc5d6p2wlwsm49wnmxmm3byjx5jvr6z9fpzrq7v7fpb086adl0h"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + ncrna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-ncrna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; + sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; - ncrna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-ncrna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/homo_sapiens/ncrna/Homo_sapiens.GRCh38.ncrna.fa.gz"; - sha256 = "1cpasykwriila52nqgvw6d3mjyh6d9qi613hvhn4h1dxkqzgnjff"; + snpeff = { + db = stdenv.mkDerivation rec { + name = "GRCh38.86"; + src = fetchurl { + url = "mirror://sourceforge/project/snpeff/databases/v4_3/snpEff_v4_3_${name}.zip"; + sha256 = "1rf8q7l732ayjq2lpny4s75zpij05j00151374nqblk4wri2mz0i"; + }; + buildInputs = [ unzip ]; + buildCommand = '' + unzip ${src} + mv data/${name} $out + ''; + }; + dbnsfp = { + db = requireFile { + name = "dbNSFP.txt.gz"; + message = "download the dbNSFP database manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlbTZodjlGUDZnTGc and add to nix store"; + sha256 = "0gahnwkc7v2q6p6ixkhvsgqvvm6xf0c3bdh4nf0alih83h3wffd0"; + }; + index = requireFile { + name = "dbNSFP.txt.gz.tbi"; + message = "download the dbNSFP index manually from https://drive.google.com/uc?export=download&id=0B7Ms5xMSFMYlNVBJdFA5cFZRYkE and add to nix store"; + sha256 = "18blkly6gvg7r0sx968xlb1zl2kqg5j1kpbrm2r7ajlxlfyvrx3w"; + }; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; }; - }; - grcm38 = grcm38-p6; - grcm38-p6 = { - seq = stdenvNoCC.mkDerivation rec { - name = "seq-grcm38.${version}"; - version = "p6"; - src = fetchurl { - url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; - sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; - }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; - }; - ensembl = { - cdna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-cdna-${version}"; - version = "94"; + grcm38 = grcm38-p6; + grcm38-p6 = { + seq = stdenvNoCC.mkDerivation rec { + name = "seq-grcm38.${version}"; + version = "p6"; src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; - sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; + url = "ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M19/GRCm38.${version}.genome.fa.gz"; + sha256 = "0ryiqab5bldpzawylsk2qpjxr2j701q03ww9jqyxhkimqpn9g3mr"; }; buildCommand = "gunzip < $src > $out"; passthru.filetype = filetype.fa {}; }; - ncrna = stdenvNoCC.mkDerivation rec { - name = "ensembl-grch38-ncrna-${version}"; - version = "94"; - src = fetchurl { - url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; - sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; + ensembl = { + cdna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-cdna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/cdna/Mus_musculus.GRCm38.cdna.all.fa.gz"; + sha256 = "0khp9l6s35lav2xqp7vkk6ybnz4wjihn7lapjf2lbpnbzjb4hp6d"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; + }; + ncrna = stdenvNoCC.mkDerivation rec { + name = "ensembl-grch38-ncrna-${version}"; + version = "94"; + src = fetchurl { + url = "ftp://ftp.ensembl.org/pub/release-${version}/fasta/mus_musculus/ncrna/Mus_musculus.GRCm38.ncrna.fa.gz"; + sha256 = "0d997gm8p2b89rm5d46m2x4vz9lijxarfr2lzylnbi8gyqrbagdd"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; }; }; - }; - mm10 = mm10-p4; - mm10-p4 = { + mm10 = mm10-p4; + mm10-p4 = { seq = stdenvNoCC.mkDerivation rec { - name = "seq-mm10.${version}"; - version = "p4"; - src = fetchurl { - url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; - sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; - }; - buildCommand = "gunzip < $src > $out"; - passthru.filetype = filetype.fa {}; + name = "seq-mm10.${version}"; + version = "p4"; + src = fetchurl { + url = "http://hgdownload.soe.ucsc.edu/goldenPath/mm10/mm10Patch4/mm10Patch4.fa.gz"; + sha256 = "1660d6d05f3aa266c6053cfd1efef1747d9e854836917241d6f47cff7a55340c"; + }; + buildCommand = "gunzip < $src > $out"; + passthru.filetype = filetype.fa {}; }; - }; -} + }; + } -- cgit v1.2.3