From e7cd661d1c5fb4135e3d436e151294e26aef9127 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Mon, 29 Oct 2018 15:33:53 +1100 Subject: Split gridss into constituents Wrap each individual command for GRIDSS so that bionix executed the pipeline rather than GRIDSS. This patch introduces a "call" function that executed the whole pipeline in bionix on an arbitrary BAM file. Resolves #10. --- tools/gridss-assemble.nix | 58 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 tools/gridss-assemble.nix (limited to 'tools/gridss-assemble.nix') diff --git a/tools/gridss-assemble.nix b/tools/gridss-assemble.nix new file mode 100644 index 0000000..cdda748 --- /dev/null +++ b/tools/gridss-assemble.nix @@ -0,0 +1,58 @@ +{ bionix +, nixpkgs +, bwaIndexAttrs ? {} +, faidxAttrs ? {} +, collectMetricsAttrs ? {} +, extractSVReadsAttrs ? {} +, flags ? null +}: + +with nixpkgs; +with lib; +with bionix.types; + +inputs: + +let + getref = matchFiletype "gridss-assemble" { bam = x: x.ref; }; + ref = getref (head inputs); + sorted = matchFileSorting "gridss-assemble" { coord = _: true; }; + homoRef = length (unique (map getref inputs)) == 1; + + linkInput = input: '' + BASENAME=$(basename ${input}) + WRKDIR="''${BASENAME}.gridss.working" + mkdir $WRKDIR + for f in ${bionix.gridss.extractSVReads extractSVReadsAttrs input}/* ; do + ln -s $f $WRKDIR/$BASENAME.''${f#*.} + done + for f in ${bionix.gridss.collectMetrics collectMetricsAttrs input}/* ; do + ln -s $f $WRKDIR/$BASENAME.''${f#*.} + done + ''; +in + +assert (all sorted inputs); +assert (homoRef); + +stdenv.mkDerivation rec { + name = "gridss-assemble"; + buildInputs = [ jre bwa ]; + buildCommand = '' + ln -s ${ref} ref.fa + ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai + for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do + ln -s $f + done + ${concatMapStringsSep "\n" linkInput inputs} + java -Xmx31g -Dsamjdk.create_index=true \ + -cp ${bionix.gridss.jar} gridss.AssembleBreakends \ + REFERENCE_SEQUENCE=ref.fa \ + ${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \ + WORKER_THREADS=$NIX_BUILD_CORES \ + OUTPUT=$out \ + WORKING_DIR=$TMPDIR/ \ + TMP_DIR=$TMPDIR/ + ''; + passthru.filetype = filetype.bam { ref = ref; sorting = sort.coord {}; }; +} -- cgit v1.2.3