aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2020-09-02 16:24:37 +1000
committerJustin Bedo <cu@cua0.org>2020-09-02 16:24:37 +1000
commit9cf1724895f001c625fba37d78f517acb6938e07 (patch)
treecef3900c0ffd13a3e71763d38bd85d893a3ce3ec /tools
parentf3dc7987e0e841b97fd43160c68f8d09265083bc (diff)
gridss: shard assembly into 10 pieces by default
Diffstat (limited to 'tools')
-rw-r--r--tools/gridss-assemble.nix15
-rw-r--r--tools/gridss-variants.nix3
-rw-r--r--tools/gridss.nix3
3 files changed, 18 insertions, 3 deletions
diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix
index a532f4b..f0a2693 100644
--- a/tools/gridss-assemble.nix
+++ b/tools/gridss-assemble.nix
@@ -6,6 +6,9 @@
, flags ? null
, config ? null
, heapSize ? "31g"
+, workdirs ? []
+, jobIndex ? null
+, jobNodes ? null
}:
with bionix;
@@ -37,7 +40,8 @@ assert (homoRef);
stage rec {
name = "gridss-assemble";
- buildInputs = with pkgs; [ jre bwa ];
+ buildInputs = with pkgs; [ jre bwa rsync ];
+ outputs = [ "out" "work" ];
buildCommand = ''
TMPDIR=$(pwd)
ln -s ${ref} ref.fa
@@ -46,7 +50,8 @@ stage rec {
ln -s $f
done
${concatMapStringsSep "\n" linkInput inputs}
- java -Xmx${heapSize} -Dsamjdk.create_index=true \
+ ${concatMapStringsSep "\n" (w: "rsync -a --ignore-existing ${w}/ ./") workdirs}
+ java -Xmx${heapSize} -Dsamjdk.create_index=true \
-cp ${bionix.gridss.jar} gridss.AssembleBreakends \
VERBOSITY=WARNING \
REFERENCE_SEQUENCE=ref.fa \
@@ -56,7 +61,13 @@ stage rec {
${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
WORKING_DIR=$TMPDIR/ \
TMP_DIR=$TMPDIR/ \
+ ${optionalString (jobIndex != null) "JOB_INDEX=${toString jobIndex}"} \
+ ${optionalString (jobIndex != null) "JOB_NODES=${toString jobNodes}"} \
${optionalString (flags != null) flags}
+ rm -rf tmp
+ touch $out
+ cp -r $TMPDIR $work
+ chmod u+rwX -R $work
'';
passthru.filetype = filetype.bam { ref = ref; sorting = sort.name {}; };
passthru.multicore = true;
diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix
index ad4c66d..c208758 100644
--- a/tools/gridss-variants.nix
+++ b/tools/gridss-variants.nix
@@ -7,6 +7,7 @@
, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
, config ? null
, heapSize ? "4g"
+, shards ? 10
}:
with bionix;
@@ -43,7 +44,7 @@ let
ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
'';
- assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs));
+ assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.shardedAssemble shards assemblyAttrs inputs));
mkLinks = ''
ln -s ${ref} ref.fa
diff --git a/tools/gridss.nix b/tools/gridss.nix
index aaf8e16..41f6a5a 100644
--- a/tools/gridss.nix
+++ b/tools/gridss.nix
@@ -43,6 +43,9 @@ rec {
Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam
*/
assemble = callBionixE ./gridss-assemble.nix;
+ shardedAssemble = n: a: input:
+ let assemblies = genList (i: bionix.gridss.assemble (a // { jobNodes = n; jobIndex = i;}) input) n;
+ in if n <= 1 then bionix.gridss.assemble a input else bionix.gridss.assemble (a // {workdirs = map (a: a.work) assemblies;}) input;
/* Invoke identifyVariants tool
Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF