From 7c46b53f316d43aad3bf7cb6891c5eb05b996de5 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Tue, 21 Sep 2021 14:15:31 +1000 Subject: gridss: specify cores for all multithreaded phases --- tools/gridss-computeSamTags.nix | 31 +++++++++--------- tools/gridss-variants.nix | 49 ++++++++++++++++------------ tools/gridss.nix | 72 ++++++++++++++++++++++++----------------- 3 files changed, 87 insertions(+), 65 deletions(-) diff --git a/tools/gridss-computeSamTags.nix b/tools/gridss-computeSamTags.nix index a2e1f8b..de44478 100644 --- a/tools/gridss-computeSamTags.nix +++ b/tools/gridss-computeSamTags.nix @@ -1,10 +1,5 @@ -{ bionix -, bwaIndexAttrs ? {} -, faidxAttrs ? {} -, flags ? null -, config ? null -, heapSize ? "1G" -}: +{ bionix, bwaIndexAttrs ? { }, faidxAttrs ? { }, flags ? null, config ? null +, heapSize ? "1G" }: with bionix; with lib; @@ -15,9 +10,8 @@ input: let ref = matchFiletype "gridss-computeSamTags" { bam = x: x.ref; } input; sorted = matchFileSorting "gridss-computeSamTags" { name = _: true; } input; -in -assert(sorted); +in assert (sorted); stage rec { name = "gridss-computeSamTags"; @@ -30,15 +24,20 @@ stage rec { done java -Xmx${heapSize} \ -Dsamjdk.create_index=false \ - -cp ${bionix.gridss.jar} gridss.ComputeSamTags \ + -cp ${bionix.gridss.jar} gridss.ComputeSamTags \ VERBOSITY=WARNING \ - REFERENCE_SEQUENCE=ref.fa \ - WORKING_DIR=$TMP_DIR \ - TMP_DIR=$TMP_DIR \ - ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ - I=${input} \ - O=$out \ + WORKER_THREADS=$NIX_BUILD_CORES \ + REFERENCE_SEQUENCE=ref.fa \ + WORKING_DIR=$TMP_DIR \ + TMP_DIR=$TMP_DIR \ + ${ + optionalString (config != null) + ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config) + } \ + I=${input} \ + O=$out \ AS=true ''; passthru.filetype = input.filetype; + passthru.multicore = true; } diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix index c208758..a5c470a 100644 --- a/tools/gridss-variants.nix +++ b/tools/gridss-variants.nix @@ -1,14 +1,7 @@ -{ bionix -, bwaIndexAttrs ? {} -, faidxAttrs ? {} -, indexAttrs ? {} -, assemblyAttrs ? {} -, collectMetricsAttrs ? {} +{ bionix, bwaIndexAttrs ? { }, faidxAttrs ? { }, indexAttrs ? { } +, assemblyAttrs ? { }, collectMetricsAttrs ? { } , softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; } -, config ? null -, heapSize ? "4g" -, shards ? 10 -}: +, config ? null, heapSize ? "4g", shards ? 10 }: with bionix; with lib; @@ -44,7 +37,9 @@ let ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai ''; - assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.shardedAssemble shards assemblyAttrs inputs)); + assembly = bionix.samtools.sort { } + (softClipsToSplitReads softClipsToSplitReadsAttrs + (bionix.gridss.shardedAssemble shards assemblyAttrs inputs)); mkLinks = '' ln -s ${ref} ref.fa @@ -54,13 +49,12 @@ let done ${concatMapStringsSep "\n" (linkSV) inputs} ${linkSV assembly} - ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs} + ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) + inputs} ${linkInput collectMetrics collectMetricsAttrs assembly} ''; -in - -assert (all sorted inputs); +in assert (all sorted inputs); assert (homoRef); rec { @@ -74,16 +68,21 @@ rec { REFERENCE_SEQUENCE=ref.fa \ ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ ASSEMBLY=${assembly} \ + WORKER_THREADS=$NIX_BUILD_CORES \ OUTPUT_VCF=out.vcf \ - ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ + ${ + optionalString (config != null) + ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config) + } \ WORKING_DIR=$TMPDIR/ \ TMP_DIR=$TMPDIR/ mv out.vcf $out - ''; + ''; passthru = { filetype = filetype.vcf { ref = ref; }; gridss.assembly = assembly; + multicore = true; }; }; @@ -91,7 +90,12 @@ rec { name = "gridss-annotateVariants"; buildInputs = with pkgs; [ jre ]; buildCommand = mkLinks + '' - ln -s ${bionix.gridss.identifyVariants {inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs collectMetricsAttrs softClipsToSplitReadsAttrs config; } inputs} input.vcf + ln -s ${ + bionix.gridss.identifyVariants { + inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs + collectMetricsAttrs softClipsToSplitReadsAttrs config; + } inputs + } input.vcf java -Xmx${heapSize} -Dsamjdk.create_index=true \ -cp ${jar} gridss.AnnotateVariants \ VERBOSITY=WARNING \ @@ -100,15 +104,20 @@ rec { ASSEMBLY=${assembly} \ INPUT_VCF=input.vcf \ OUTPUT_VCF=out.vcf \ + WORKER_THREADS=$NIX_BUILD_CORES \ WORKING_DIR=$TMPDIR/ \ - ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ + ${ + optionalString (config != null) + ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config) + } \ TMP_DIR=$TMPDIR/ mv out.vcf $out - ''; + ''; passthru = { filetype = filetype.vcf { ref = ref; }; gridss.assembly = assembly; + multicore = true; }; }; diff --git a/tools/gridss.nix b/tools/gridss.nix index 70ba21d..a002e69 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -1,81 +1,95 @@ -{bionix}: +{ bionix }: with bionix; with lib; rec { jar = pkgs.fetchurl { - url = "https://github.com/PapenfussLab/gridss/releases/download/v2.12.1/gridss-2.12.1-gridss-jar-with-dependencies.jar"; + url = + "https://github.com/PapenfussLab/gridss/releases/download/v2.12.1/gridss-2.12.1-gridss-jar-with-dependencies.jar"; sha256 = "sha256-wH5O+vYKlWGDJTTnMgKYrSNtzoU7wDHqNraiW8xrxXA="; }; /* Generate configuration file for GRIDSS. Takes attribute sets to GRIDSS ini style format. - Type: genConfig :: attrSet -> ini file + Type: genConfig :: attrSet -> ini file */ - genConfig = callBionix ./gridss-configFile.nix {}; + genConfig = callBionix ./gridss-configFile.nix { }; /* Invoke the callVariants tool - Type: callVariants :: {blacklist :: drv = null, config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> variants + Type: callVariants :: {blacklist :: drv = null, config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> variants */ callVariants = callBionixE ./gridss-callVariants.nix; /* Invoke computeSamTags tool - Type: computeSamTags :: {config :: ini = null, heapSize :: String = "1G", ...} -> bam -> bam + Type: computeSamTags :: {config :: ini = null, heapSize :: String = "1G", ...} -> bam -> bam */ computeSamTags = callBionixE ./gridss-computeSamTags.nix; /* Invoke softClipsToSplitReads tool - Type: softClipsToSplitReads :: {alignerStreaming :: Bool = false, config :: ini = null, heapSize :: String = "2G", ...} -> bam -> bam + Type: softClipsToSplitReads :: {alignerStreaming :: Bool = false, config :: ini = null, heapSize :: String = "2G", ...} -> bam -> bam */ softClipsToSplitReads = callBionixE ./gridss-softClipsToSplitReads.nix; /* Invoke collectMetrics tool - Type: collectMetrics :: {thresholdCoverage :: Int = 10000, config :: ini = null, heapSize :: String = "1G", ...} -> bam -> metrics + Type: collectMetrics :: {thresholdCoverage :: Int = 10000, config :: ini = null, heapSize :: String = "1G", ...} -> bam -> metrics */ collectMetrics = callBionixE ./gridss-collectMetrics.nix; /* Invoke extractSVReads tool - Type: extractSVReads :: {unmappedReads :: Bool = false, minClipLength :: Int = 5, config :: ini = null, ...} -> bam -> bam + Type: extractSVReads :: {unmappedReads :: Bool = false, minClipLength :: Int = 5, config :: ini = null, ...} -> bam -> bam */ extractSVReads = callBionixE ./gridss-extractSVReads.nix; /* Invoke assembly tool - Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam + Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam */ assemble = callBionixE ./gridss-assemble.nix; shardedAssemble = n: a: input: - let assemblies = genList (i: bionix.gridss.assemble (a // { jobNodes = n; jobIndex = i;}) input) n; - in if n <= 1 then bionix.gridss.assemble a input else bionix.gridss.assemble (a // {workdirs = map (a: a.work) assemblies;}) input; + let + assemblies = genList (i: + bionix.gridss.assemble (a // { + jobNodes = n; + jobIndex = i; + }) input) n; + in if n <= 1 then + bionix.gridss.assemble a input + else + bionix.gridss.assemble (a // { workdirs = map (a: a.work) assemblies; }) + input; /* Invoke identifyVariants tool - Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF + Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF */ - identifyVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify); + identifyVariants = exec + (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify); /* Invoke annotateVariants tool - Type: annotateVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF + Type: annotateVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF */ - annotateVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate); + annotateVariants = exec + (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate); - /* As annotateVariants except include assembly in output */ - annotateAndAssemble = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotateAndAssemble); + # As annotateVariants except include assembly in output + annotateAndAssemble = exec (attrs: input: + ((callBionix ./gridss-variants.nix attrs) input).annotateAndAssemble); - /* Preprocess BAM files to extract SV reads and compute required stats - Type: preprocessBam :: bam -> bam + /* Preprocess BAM files to extract SV reads + Type: preprocessBam :: bam -> bam */ preprocessBam = with samtools; flip pipe [ - (gridss.extractSVReads {}) - (sort {nameSort = true;}) - (gridss.computeSamTags {}) - (sort {}) + (gridss.extractSVReads { }) + (sort { nameSort = true; }) + (gridss.computeSamTags { }) + (sort { }) ]; - /* Call SVs: entire pipeline including preprocessing. It is recommended to use this function rather than the individual tools above. - Type: call :: [bam] -> GRIDSS result + /* Call SVs: entire pipeline including preprocessing. It is recommended to use this function rather than the individual above tools. + Type: [bam] -> GRIDSS result */ - call = inputs: gridss.annotateVariants {} (map gridss.preprocessBam inputs); + call = inputs: gridss.annotateVariants { } (map gridss.preprocessBam inputs); - /* As call but include assemblies in output */ - callAndAssemble = inputs: gridss.annotateAndAssemble {} (map gridss.preprocessBam inputs); + # As call but include assemblies in output + callAndAssemble = inputs: + gridss.annotateAndAssemble { } (map gridss.preprocessBam inputs); } -- cgit v1.2.3