diff options
| -rw-r--r-- | tools/gridss-callVariants.nix | 3 | ||||
| -rw-r--r-- | tools/gridss-collectMetrics.nix | 6 | ||||
| -rw-r--r-- | tools/platypus-callVariants.nix | 3 | ||||
| -rw-r--r-- | tools/samtools-merge.nix | 7 | ||||
| -rw-r--r-- | tools/strelka-call.nix | 13 | ||||
| -rw-r--r-- | tools/strelka-callSomatic.nix | 13 | ||||
| -rw-r--r-- | tools/strelka.nix | 12 | 
7 files changed, 48 insertions, 9 deletions
| diff --git a/tools/gridss-callVariants.nix b/tools/gridss-callVariants.nix index ffbb349..f42e3f3 100644 --- a/tools/gridss-callVariants.nix +++ b/tools/gridss-callVariants.nix @@ -50,6 +50,9 @@ stage rec {  	    ASSEMBLY="$out/gridss.bam" \        ${optionalString (blacklist != null) ("BLACKLIST=" + blacklist)} \        ${optionalString (flags != null) flags} + +    # The VCF index is non-deterministic +    rm $out/gridss.vcf.idx    '';    passthru.multicore = true;  } diff --git a/tools/gridss-collectMetrics.nix b/tools/gridss-collectMetrics.nix index c62346e..2e3b3a9 100644 --- a/tools/gridss-collectMetrics.nix +++ b/tools/gridss-collectMetrics.nix @@ -29,5 +29,11 @@ stage rec {  			O=$out/input \        AS=true \  			THRESHOLD_COVERAGE=${toString thresholdCoverage} + +    # Make the output deterministic by removing timestamps +    sed -i '/^# Started on:/d' $out/input.*_metrics +    if [ -e $out/input.insert_size_histogram.pdf ] ; then +      sed -i 's/(D:[0-9]\+)/(D:19700101000000)/g' $out/input.insert_size_histogram.pdf +    fi    '';  } diff --git a/tools/platypus-callVariants.nix b/tools/platypus-callVariants.nix index f3b3e7c..abe2a6b 100644 --- a/tools/platypus-callVariants.nix +++ b/tools/platypus-callVariants.nix @@ -35,6 +35,9 @@ stage {        ${optionalString (flags != null) flags} \        -o $out \        --bamFiles=${concatMapStringsSep "," (p: "${filename p}.bam") inputs} + +    # Remove timestamps from output +    sed -i '/^##fileDate/d' $out    '';    passthru.filetype = filetype.vcf {ref = ref;};    passthru.multicore = true; diff --git a/tools/samtools-merge.nix b/tools/samtools-merge.nix index 120f825..9b7d133 100644 --- a/tools/samtools-merge.nix +++ b/tools/samtools-merge.nix @@ -19,7 +19,12 @@ stage {    name = "samtools-merge";    buildInputs = with pkgs; [ samtools ];    buildCommand = '' -    samtools merge ${optionalString (flags != null) flags} $out ${concatStringsSep " " inputs} +    samtools merge ${optionalString (flags != null) flags} out.bam ${concatStringsSep " " inputs} + +    # Merge is non-deterministic with PG lines; if files have clashing PG IDs then a random +    # suffix is appended to make it unique. PG lines are stripped in the following to +    # resolve the issue. +    samtools reheader <(samtools view -H out.bam | grep -v '@PG') out.bam > $out    '';    passthru.filetype = (builtins.elemAt inputs 0).filetype;  } diff --git a/tools/strelka-call.nix b/tools/strelka-call.nix index 7836bbe..bc1e202 100644 --- a/tools/strelka-call.nix +++ b/tools/strelka-call.nix @@ -38,7 +38,18 @@ stage {        -m local \        -j $NIX_BUILD_CORES 2>&1 -    cp -r results $out +    # Strelka writes runtime stats and timestamps; +    # both have to be stripped to provide determinism +    cd results/variants +    rm *.tbi genome.vcf.gz +    for f in *.vcf.gz; do +      gunzip $f +      g=$(basename $f .gz) +      sed -i '/^##fileDate/d' $g +      sed -i '/^##startTime/d' $g +    done +    mkdir $out +    cp -r * $out    '';    passthru.multicore = true;  } diff --git a/tools/strelka-callSomatic.nix b/tools/strelka-callSomatic.nix index 256065b..cf4b414 100644 --- a/tools/strelka-callSomatic.nix +++ b/tools/strelka-callSomatic.nix @@ -40,7 +40,18 @@ stage {        -m local \        -j $NIX_BUILD_CORES -    cp -r results $out +    # Strelka writes runtime stats and timestamps; +    # both have to be stripped to provide determinism +    cd results/variants +    rm *.tbi +    for f in *.vcf.gz; do +      gunzip $f +      g=$(basename $f .gz) +      sed -i '/^##fileDate/d' $g +      sed -i '/^##startTime/d' $g +    done +    mkdir $out +    cp -r * $out    '';    passthru.multicore = true;  } diff --git a/tools/strelka.nix b/tools/strelka.nix index 1cf8961..d3068b5 100644 --- a/tools/strelka.nix +++ b/tools/strelka.nix @@ -20,9 +20,9 @@ with types;      drv: stage {      name = "strelka-call-variants";      buildCommand = '' -      ln -s ${drv}/variants/variants.vcf.gz $out +      ln -s ${drv}/variants/variants.vcf $out      ''; -    passthru.filetype = filetype.gz (filetype.vcf {ref=ref;}); +    passthru.filetype = filetype.vcf {ref=ref;};    };    /* Extract indels from somatic results    Type: indels :: somatic results -> vcf @@ -31,8 +31,8 @@ with types;      # result of callSomatic      drv: stage {      name = "strelka-callVariants-indels"; -    buildCommand = "ln -s ${drv}/variants/somatic.indels.vcf.gz $out"; -    passthru.filetype = filetype.gz (filetype.vcf {ref = ref;}); +    buildCommand = "ln -s ${drv}/variants/somatic.indels.vcf $out"; +    passthru.filetype = filetype.vcf {ref = ref;};    };    /* Extract SNVs from somatic results    Type: snvs :: somatic results -> vcf @@ -41,7 +41,7 @@ with types;      # result of callSomatic      drv: stage {      name = "strelka-callVariants-snvs"; -    buildCommand = "ln -s ${drv}/variants/somatic.snvs.vcf.gz $out"; -    passthru.filetype = filetype.gz (filetype.vcf {ref = ref;}); +    buildCommand = "ln -s ${drv}/variants/somatic.snvs.vcf $out"; +    passthru.filetype = filetype.vcf {ref = ref;};    };  } | 
