diff options
author | Justin Bedo <cu@cua0.org> | 2018-10-26 07:58:32 +1100 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2018-10-26 07:58:32 +1100 |
commit | 759132aca8b954f8b78f74af9a1e01f411e4b1a6 (patch) | |
tree | ce338bccd28ac2ffcb73e3380e3a3877321cb350 | |
parent | 99ee99c490cc109b78986181ee1be4ce1cd0c0ed (diff) |
init: cosmic & dbsnp for grch38
-rwxr-xr-x | lib/references.nix | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/lib/references.nix b/lib/references.nix index 3614647..498cfd9 100755 --- a/lib/references.nix +++ b/lib/references.nix @@ -5,7 +5,7 @@ with bionix.types; rec { grch38 = grch38-p12; - grch38-p12 = { + grch38-p12 = rec { seq = stdenvNoCC.mkDerivation rec { name = "seq-grch38.${version}"; version = "p12"; @@ -25,6 +25,27 @@ rec { buildCommand = "gunzip < $src > $out"; passthru.filetype = filetype.bed { ref = seq; }; }; + dbsnp = stdenvNoCC.mkDerivation { + name = "dbsnp-b151_GRCh38p7"; + src = fetchurl { + url = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/common_all_20180418.vcf.gz"; + sha256 = "0r6m2yrcfw8bbdca515axjls30ssjas6x3qwi5qz07l3prjwmdd4"; + }; + buildInputs = [ gawk ]; + buildCommand = '' + gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; + cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec { + name = "cosmic-grch38"; + buildInputs = [ gawk ]; + buildCommand = '' + gunzip < ${coding} | grep '^#' > $out + cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out + ''; + passthru.filetype = filetype.vcf { ref = seq; }; + }; }; grcm38 = grcm38-p6; |