From 9cf1724895f001c625fba37d78f517acb6938e07 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Wed, 2 Sep 2020 16:24:37 +1000 Subject: gridss: shard assembly into 10 pieces by default --- tools/gridss-assemble.nix | 15 +++++++++++++-- tools/gridss-variants.nix | 3 ++- tools/gridss.nix | 3 +++ 3 files changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix index a532f4b..f0a2693 100644 --- a/tools/gridss-assemble.nix +++ b/tools/gridss-assemble.nix @@ -6,6 +6,9 @@ , flags ? null , config ? null , heapSize ? "31g" +, workdirs ? [] +, jobIndex ? null +, jobNodes ? null }: with bionix; @@ -37,7 +40,8 @@ assert (homoRef); stage rec { name = "gridss-assemble"; - buildInputs = with pkgs; [ jre bwa ]; + buildInputs = with pkgs; [ jre bwa rsync ]; + outputs = [ "out" "work" ]; buildCommand = '' TMPDIR=$(pwd) ln -s ${ref} ref.fa @@ -46,7 +50,8 @@ stage rec { ln -s $f done ${concatMapStringsSep "\n" linkInput inputs} - java -Xmx${heapSize} -Dsamjdk.create_index=true \ + ${concatMapStringsSep "\n" (w: "rsync -a --ignore-existing ${w}/ ./") workdirs} + java -Xmx${heapSize} -Dsamjdk.create_index=true \ -cp ${bionix.gridss.jar} gridss.AssembleBreakends \ VERBOSITY=WARNING \ REFERENCE_SEQUENCE=ref.fa \ @@ -56,7 +61,13 @@ stage rec { ${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \ WORKING_DIR=$TMPDIR/ \ TMP_DIR=$TMPDIR/ \ + ${optionalString (jobIndex != null) "JOB_INDEX=${toString jobIndex}"} \ + ${optionalString (jobIndex != null) "JOB_NODES=${toString jobNodes}"} \ ${optionalString (flags != null) flags} + rm -rf tmp + touch $out + cp -r $TMPDIR $work + chmod u+rwX -R $work ''; passthru.filetype = filetype.bam { ref = ref; sorting = sort.name {}; }; passthru.multicore = true; diff --git a/tools/gridss-variants.nix b/tools/gridss-variants.nix index ad4c66d..c208758 100644 --- a/tools/gridss-variants.nix +++ b/tools/gridss-variants.nix @@ -7,6 +7,7 @@ , softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; } , config ? null , heapSize ? "4g" +, shards ? 10 }: with bionix; @@ -43,7 +44,7 @@ let ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai ''; - assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.assemble assemblyAttrs inputs)); + assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.shardedAssemble shards assemblyAttrs inputs)); mkLinks = '' ln -s ${ref} ref.fa diff --git a/tools/gridss.nix b/tools/gridss.nix index aaf8e16..41f6a5a 100644 --- a/tools/gridss.nix +++ b/tools/gridss.nix @@ -43,6 +43,9 @@ rec { Type: assemble :: {config :: ini = null, heapSize :: String = "31g", ...} -> [bam] -> bam */ assemble = callBionixE ./gridss-assemble.nix; + shardedAssemble = n: a: input: + let assemblies = genList (i: bionix.gridss.assemble (a // { jobNodes = n; jobIndex = i;}) input) n; + in if n <= 1 then bionix.gridss.assemble a input else bionix.gridss.assemble (a // {workdirs = map (a: a.work) assemblies;}) input; /* Invoke identifyVariants tool Type: identifyVariants :: {config :: ini = null, heapSize :: String = "4g", ...} -> [bam] -> VCF -- cgit v1.2.3