From 759132aca8b954f8b78f74af9a1e01f411e4b1a6 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 26 Oct 2018 07:58:32 +1100 Subject: init: cosmic & dbsnp for grch38 --- lib/references.nix | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/references.nix b/lib/references.nix index 3614647..498cfd9 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -5,7 +5,7 @@ with bionix.types; rec { grch38 = grch38-p12; - grch38-p12 = { + grch38-p12 = rec { seq = stdenvNoCC.mkDerivation rec { name = "seq-grch38.${version}"; version = "p12"; @@ -25,6 +25,27 @@ rec { buildCommand = "gunzip < $src > $out"; passthru.filetype = filetype.bed { ref = seq; }; }; + dbsnp = stdenvNoCC.mkDerivation { + name = "dbsnp-b151_GRCh38p7"; + src = fetchurl { + url = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/common_all_20180418.vcf.gz"; + sha256 = "0r6m2yrcfw8bbdca515axjls30ssjas6x3qwi5qz07l3prjwmdd4"; + }; + buildInputs = [ gawk ]; + buildCommand = '' + gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { + name = "cosmic-grch38"; + buildInputs = [ gawk ]; + buildCommand = '' + gunzip < ${coding} | grep '^#' > $out + cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; }; grcm38 = grcm38-p6; -- cgit v1.2.3