aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2021-09-21 14:15:31 +1000
committerJustin Bedo <cu@cua0.org>2021-09-21 14:15:31 +1000
commit7c46b53f316d43aad3bf7cb6891c5eb05b996de5 (patch)
tree2a77c938ed9cf8d1723f73521e566d7e17a2ac3c
parent1ff1c9cf0bcc95a6555d1217598f55b7b9eae866 (diff)
gridss: specify cores for all multithreaded phases
-rw-r--r--tools/gridss-computeSamTags.nix31
-rw-r--r--tools/gridss-variants.nix49
-rw-r--r--tools/gridss.nix72
3 files changed, 87 insertions, 65 deletions
diff --git a/tools/gridss-computeSamTags.nix b/tools/gridss-computeSamTags.nix
index a2e1f8b..de44478 100644
--- a/tools/gridss-computeSamTags.nix
+++ b/tools/gridss-computeSamTags.nix
@@ -1,10 +1,5 @@
-{ bionix
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, flags ? null
-, config ? null
-, heapSize ? "1G"
-}:
+{ bionix, bwaIndexAttrs ? { }, faidxAttrs ? { }, flags ? null, config ? null
+, heapSize ? "1G" }:
with bionix;
with lib;
@@ -15,9 +10,8 @@ input:
let
ref = matchFiletype "gridss-computeSamTags" { bam = x: x.ref; } input;
sorted = matchFileSorting "gridss-computeSamTags" { name = _: true; } input;
-in
-assert(sorted);
+in assert (sorted);
stage rec {
name = "gridss-computeSamTags";
@@ -30,15 +24,20 @@ stage rec {
done
java -Xmx${heapSize} \
-Dsamjdk.create_index=false \
- -cp ${bionix.gridss.jar} gridss.ComputeSamTags \
+ -cp ${bionix.gridss.jar} gridss.ComputeSamTags \
VERBOSITY=WARNING \
- REFERENCE_SEQUENCE=ref.fa \
- WORKING_DIR=$TMP_DIR \
- TMP_DIR=$TMP_DIR \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
- I=${input} \
- O=$out \
+ WORKER_THREADS=$NIX_BUILD_CORES \
+ REFERENCE_SEQUENCE=ref.fa \
+ WORKING_DIR=$TMP_DIR \
+ TMP_DIR=$TMP_DIR \
+ ${
+ optionalString (config != null)
+ ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)
+ } \
+ I=${input} \
+ O=$out \
AS=true
'';
passthru.filetype = input.filetype;
+ passthru.multicore = true;
}
diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix
index c208758..a5c470a 100644
--- a/tools/gridss-variants.nix
+++ b/tools/gridss-variants.nix
@@ -1,14 +1,7 @@
-{ bionix
-, bwaIndexAttrs ? {}
-, faidxAttrs ? {}
-, indexAttrs ? {}
-, assemblyAttrs ? {}
-, collectMetricsAttrs ? {}
+{ bionix, bwaIndexAttrs ? { }, faidxAttrs ? { }, indexAttrs ? { }
+, assemblyAttrs ? { }, collectMetricsAttrs ? { }
, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
-, config ? null
-, heapSize ? "4g"
-, shards ? 10
-}:
+, config ? null, heapSize ? "4g", shards ? 10 }:
with bionix;
with lib;
@@ -44,7 +37,9 @@ let
ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
'';
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.shardedAssemble shards assemblyAttrs inputs));
+ assembly = bionix.samtools.sort { }
+ (softClipsToSplitReads softClipsToSplitReadsAttrs
+ (bionix.gridss.shardedAssemble shards assemblyAttrs inputs));
mkLinks = ''
ln -s ${ref} ref.fa
@@ -54,13 +49,12 @@ let
done
${concatMapStringsSep "\n" (linkSV) inputs}
${linkSV assembly}
- ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
+ ${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs)
+ inputs}
${linkInput collectMetrics collectMetricsAttrs assembly}
'';
-in
-
-assert (all sorted inputs);
+in assert (all sorted inputs);
assert (homoRef);
rec {
@@ -74,16 +68,21 @@ rec {
REFERENCE_SEQUENCE=ref.fa \
${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
ASSEMBLY=${assembly} \
+ WORKER_THREADS=$NIX_BUILD_CORES \
OUTPUT_VCF=out.vcf \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ ${
+ optionalString (config != null)
+ ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)
+ } \
WORKING_DIR=$TMPDIR/ \
TMP_DIR=$TMPDIR/
mv out.vcf $out
- '';
+ '';
passthru = {
filetype = filetype.vcf { ref = ref; };
gridss.assembly = assembly;
+ multicore = true;
};
};
@@ -91,7 +90,12 @@ rec {
name = "gridss-annotateVariants";
buildInputs = with pkgs; [ jre ];
buildCommand = mkLinks + ''
- ln -s ${bionix.gridss.identifyVariants {inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs collectMetricsAttrs softClipsToSplitReadsAttrs config; } inputs} input.vcf
+ ln -s ${
+ bionix.gridss.identifyVariants {
+ inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs
+ collectMetricsAttrs softClipsToSplitReadsAttrs config;
+ } inputs
+ } input.vcf
java -Xmx${heapSize} -Dsamjdk.create_index=true \
-cp ${jar} gridss.AnnotateVariants \
VERBOSITY=WARNING \
@@ -100,15 +104,20 @@ rec {
ASSEMBLY=${assembly} \
INPUT_VCF=input.vcf \
OUTPUT_VCF=out.vcf \
+ WORKER_THREADS=$NIX_BUILD_CORES \
WORKING_DIR=$TMPDIR/ \
- ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
+ ${
+ optionalString (config != null)
+ ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)
+ } \
TMP_DIR=$TMPDIR/
mv out.vcf $out
- '';
+ '';
passthru = {
filetype = filetype.vcf { ref = ref; };
gridss.assembly = assembly;
+ multicore = true;
};
};
diff --git a/tools/gridss.nix b/tools/gridss.nix
index 70ba21d..a002e69 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -1,81 +1,95 @@
-{bionix}:
+{ bionix }:
with bionix;
with lib;
rec {
jar = pkgs.fetchurl {
- url = "https://github.com/PapenfussLab/gridss/releases/download/v2.12.1/gridss-2.12.1-gridss-jar-with-dependencies.jar";
+ url =
+ "https://github.com/PapenfussLab/gridss/releases/download/v2.12.1/gridss-2.12.1-gridss-jar-with-dependencies.jar";
sha256 = "sha256-wH5O+vYKlWGDJTTnMgKYrSNtzoU7wDHqNraiW8xrxXA=";
};
/* Generate configuration file for GRIDSS. Takes attribute sets to GRIDSS ini style format.
- Type: genConfig :: attrSet -> ini file
+ Type: genConfig :: attrSet -> ini file
*/
- genConfig = callBionix ./gridss-configFile.nix {};
+ genConfig = callBionix ./gridss-configFile.nix { };
/* Invoke the callVariants tool
- Type: callVariants :: {blacklist :: drv = null, config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> variants
+ Type: callVariants :: {blacklist :: drv = null, config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> variants
*/
callVariants = callBionixE ./gridss-callVariants.nix;
/* Invoke computeSamTags tool
- Type: computeSamTags :: {config :: ini = null, heapSize :: String = "1G", ...} -> bam -> bam
+ Type: computeSamTags :: {config :: ini = null, heapSize :: String = "1G", ...} -> bam -> bam
*/
computeSamTags = callBionixE ./gridss-computeSamTags.nix;
/* Invoke softClipsToSplitReads tool
- Type: softClipsToSplitReads :: {alignerStreaming :: Bool = false, config :: ini = null, heapSize :: String = "2G", ...} -> bam -> bam
+ Type: softClipsToSplitReads :: {alignerStreaming :: Bool = false, config :: ini = null, heapSize :: String = "2G", ...} -> bam -> bam
*/
softClipsToSplitReads = callBionixE ./gridss-softClipsToSplitReads.nix;
/* Invoke collectMetrics tool
- Type: collectMetrics :: {thresholdCoverage :: Int = 10000, config :: ini = null, heapSize :: String = "1G", ...} -> bam -> metrics
+ Type: collectMetrics :: {thresholdCoverage :: Int = 10000, config :: ini = null, heapSize :: String = "1G", ...} -> bam -> metrics
*/
collectMetrics = callBionixE ./gridss-collectMetrics.nix;
/* Invoke extractSVReads tool
- Type: extractSVReads :: {unmappedReads :: Bool = false, minClipLength :: Int = 5, config :: ini = null, ...} -> bam -> bam
+ Type: extractSVReads :: {unmappedReads :: Bool = false, minClipLength :: Int = 5, config :: ini = null, ...} -> bam -> bam
*/
extractSVReads = callBionixE ./gridss-extractSVReads.nix;
/* Invoke assembly tool
- Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam
+ Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam
*/
assemble = callBionixE ./gridss-assemble.nix;
shardedAssemble = n: a: input:
- let assemblies = genList (i: bionix.gridss.assemble (a // { jobNodes = n; jobIndex = i;}) input) n;
- in if n <= 1 then bionix.gridss.assemble a input else bionix.gridss.assemble (a // {workdirs = map (a: a.work) assemblies;}) input;
+ let
+ assemblies = genList (i:
+ bionix.gridss.assemble (a // {
+ jobNodes = n;
+ jobIndex = i;
+ }) input) n;
+ in if n <= 1 then
+ bionix.gridss.assemble a input
+ else
+ bionix.gridss.assemble (a // { workdirs = map (a: a.work) assemblies; })
+ input;
/* Invoke identifyVariants tool
- Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
+ Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
*/
- identifyVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify);
+ identifyVariants = exec
+ (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).identify);
/* Invoke annotateVariants tool
- Type: annotateVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
+ Type: annotateVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF
*/
- annotateVariants = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate);
+ annotateVariants = exec
+ (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotate);
- /* As annotateVariants except include assembly in output */
- annotateAndAssemble = exec (attrs: input: ((callBionix ./gridss-variants.nix attrs) input).annotateAndAssemble);
+ # As annotateVariants except include assembly in output
+ annotateAndAssemble = exec (attrs: input:
+ ((callBionix ./gridss-variants.nix attrs) input).annotateAndAssemble);
- /* Preprocess BAM files to extract SV reads and compute required stats
- Type: preprocessBam :: bam -> bam
+ /* Preprocess BAM files to extract SV reads
+ Type: preprocessBam :: bam -> bam
*/
preprocessBam = with samtools;
flip pipe [
- (gridss.extractSVReads {})
- (sort {nameSort = true;})
- (gridss.computeSamTags {})
- (sort {})
+ (gridss.extractSVReads { })
+ (sort { nameSort = true; })
+ (gridss.computeSamTags { })
+ (sort { })
];
- /* Call SVs: entire pipeline including preprocessing. It is recommended to use this function rather than the individual tools above.
- Type: call :: [bam] -> GRIDSS result
+ /* Call SVs: entire pipeline including preprocessing. It is recommended to use this function rather than the individual above tools.
+ Type: [bam] -> GRIDSS result
*/
- call = inputs: gridss.annotateVariants {} (map gridss.preprocessBam inputs);
+ call = inputs: gridss.annotateVariants { } (map gridss.preprocessBam inputs);
- /* As call but include assemblies in output */
- callAndAssemble = inputs: gridss.annotateAndAssemble {} (map gridss.preprocessBam inputs);
+ # As call but include assemblies in output
+ callAndAssemble = inputs:
+ gridss.annotateAndAssemble { } (map gridss.preprocessBam inputs);
}