aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2018-10-26 07:58:32 +1100
committerJustin Bedo <cu@cua0.org>2018-10-26 07:58:32 +1100
commit759132aca8b954f8b78f74af9a1e01f411e4b1a6 (patch)
treece338bccd28ac2ffcb73e3380e3a3877321cb350
parent99ee99c490cc109b78986181ee1be4ce1cd0c0ed (diff)
init: cosmic & dbsnp for grch38
-rwxr-xr-xlib/references.nix23
1 files changed, 22 insertions, 1 deletions
diff --git a/lib/references.nix b/lib/references.nix
index 3614647..498cfd9 100755
--- a/lib/references.nix
+++ b/lib/references.nix
@@ -5,7 +5,7 @@ with bionix.types;
rec {
grch38 = grch38-p12;
- grch38-p12 = {
+ grch38-p12 = rec {
seq = stdenvNoCC.mkDerivation rec {
name = "seq-grch38.${version}";
version = "p12";
@@ -25,6 +25,27 @@ rec {
buildCommand = "gunzip < $src > $out";
passthru.filetype = filetype.bed { ref = seq; };
};
+ dbsnp = stdenvNoCC.mkDerivation {
+ name = "dbsnp-b151_GRCh38p7";
+ src = fetchurl {
+ url = "ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b151_GRCh38p7/VCF/common_all_20180418.vcf.gz";
+ sha256 = "0r6m2yrcfw8bbdca515axjls30ssjas6x3qwi5qz07l3prjwmdd4";
+ };
+ buildInputs = [ gawk ];
+ buildCommand = ''
+ gunzip < $src | awk '/^[^#]/{print "chr" $0;next}{print}' > $out
+ '';
+ passthru.filetype = filetype.vcf { ref = seq; };
+ };
+ cosmic = {coding, noncoding}: stdenvNoCC.mkDerivation rec {
+ name = "cosmic-grch38";
+ buildInputs = [ gawk ];
+ buildCommand = ''
+ gunzip < ${coding} | grep '^#' > $out
+ cat ${coding} ${noncoding} | gunzip | grep -v '^#' | sed 's/^/chr/' | sort -t$'\t' -k1,1 -k2,2n >> $out
+ '';
+ passthru.filetype = filetype.vcf { ref = seq; };
+ };
};
grcm38 = grcm38-p6;