blob: 33992c4356c6bda463edea1a8bc1b4f7ab0b9eeb (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
|
{ bionix
, nixpkgs
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
, assemblyAttrs ? {}
, extractSVReadsAttrs ? {}
, collectMetricsAttrs ? {}
, softClipsToSplitReadsAttrs ? {}
, identifyVariantsAttrs ? {}
, flags ? null
, config ? null
}:
with nixpkgs;
with lib;
with bionix.types;
with bionix.gridss;
inputs:
let
getref = matchFiletype "gridss-annotateVariants" { bam = x: x.ref; };
ref = getref (head inputs);
sorted = matchFileSorting "gridss-annotateVariants" { coord = _: true; };
homoRef = length (unique (map getref inputs)) == 1;
linkInput = f: attrs: input: ''
BASENAME=$(basename ${input})
WRKDIR="''${BASENAME}.gridss.working"
if [[ ! -e $WRKDIR ]] ; then
mkdir $WRKDIR
fi
for f in ${f attrs input}/* ; do
ln -s $f $WRKDIR/$BASENAME.''${f#*.}
done
'';
assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.samtools.sort { nameSort = true;} (bionix.gridss.assemble assemblyAttrs inputs)));
in
assert (all sorted inputs);
assert (homoRef);
stdenv.mkDerivation rec {
name = "gridss-identifyVariants";
buildInputs = [ jre ];
buildCommand = ''
ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
ln -s $f
done
${concatMapStringsSep "\n" (linkInput extractSVReads extractSVReadsAttrs) inputs}
${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
${linkInput collectMetrics collectMetricsAttrs assembly}
ASSBASE=$(basename ${assembly})
ln -s ${assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bam
ln -s ${bionix.samtools.index {} assembly} $ASSBASE.gridss.working/$ASSBASE.sv.bai
ln -s ${identifyVariants identifyVariantsAttrs inputs} input.vcf
java -Xmx4g -Dsamjdk.create_index=true \
-cp ${jar} gridss.AnnotateVariants \
REFERENCE_SEQUENCE=ref.fa \
${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
ASSEMBLY=${assembly} \
INPUT_VCF=input.vcf \
OUTPUT_VCF=out.vcf \
WORKING_DIR=$TMPDIR/ \
${optionalString config ("CONFIGURATION_FILE=" + gridssConfig config)} \
TMP_DIR=$TMPDIR/
mv out.vcf $out
'';
passthru = {
filetype = filetype.vcf { ref = ref; };
gridss.assembly = assembly;
};
}
|