blob: c208758a245ed87bc788b18872fa1d28d79911be (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
|
{ bionix
, bwaIndexAttrs ? {}
, faidxAttrs ? {}
, indexAttrs ? {}
, assemblyAttrs ? {}
, collectMetricsAttrs ? {}
, softClipsToSplitReadsAttrs ? { flags = "REALIGN_ENTIRE_READ=true"; }
, config ? null
, heapSize ? "4g"
, shards ? 10
}:
with bionix;
with lib;
with types;
with gridss;
inputs:
let
getref = matchFiletype "gridss-variants" { bam = x: x.ref; };
ref = getref (head inputs);
sorted = matchFileSorting "gridss-variants" { coord = _: true; };
homoRef = length (unique (map getref inputs)) == 1;
linkInput = f: attrs: input: ''
BASENAME=$(basename ${input})
WRKDIR="''${BASENAME}.gridss.working"
if [[ ! -e $WRKDIR ]] ; then
mkdir $WRKDIR
fi
for f in ${f attrs input}/* ; do
ln -s $f $WRKDIR/$BASENAME.''${f##*.}
done
'';
linkSV = input: ''
BASENAME=$(basename ${input})
WRKDIR="''${BASENAME}.gridss.working"
if [[ ! -e $WRKDIR ]] ; then
mkdir $WRKDIR
fi
ln -s ${input} $WRKDIR/$BASENAME.sv.bam
ln -s ${bionix.samtools.index indexAttrs input} $WRKDIR/$BASENAME.sv.bai
'';
assembly = bionix.samtools.sort {} (softClipsToSplitReads softClipsToSplitReadsAttrs (bionix.gridss.shardedAssemble shards assemblyAttrs inputs));
mkLinks = ''
ln -s ${ref} ref.fa
ln -s ${bionix.samtools.faidx faidxAttrs ref} ref.fa.fai
for f in ${bionix.bwa.index bwaIndexAttrs ref}/*; do
ln -s $f
done
${concatMapStringsSep "\n" (linkSV) inputs}
${linkSV assembly}
${concatMapStringsSep "\n" (linkInput collectMetrics collectMetricsAttrs) inputs}
${linkInput collectMetrics collectMetricsAttrs assembly}
'';
in
assert (all sorted inputs);
assert (homoRef);
rec {
identify = stage rec {
name = "gridss-identifyVariants";
buildInputs = with pkgs; [ jre samtools ];
buildCommand = mkLinks + ''
java -Xmx${heapSize} -Dsamjdk.create_index=true \
-cp ${jar} gridss.IdentifyVariants \
VERBOSITY=WARNING \
REFERENCE_SEQUENCE=ref.fa \
${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
ASSEMBLY=${assembly} \
OUTPUT_VCF=out.vcf \
${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
WORKING_DIR=$TMPDIR/ \
TMP_DIR=$TMPDIR/
mv out.vcf $out
'';
passthru = {
filetype = filetype.vcf { ref = ref; };
gridss.assembly = assembly;
};
};
annotate = stage rec {
name = "gridss-annotateVariants";
buildInputs = with pkgs; [ jre ];
buildCommand = mkLinks + ''
ln -s ${bionix.gridss.identifyVariants {inherit bwaIndexAttrs faidxAttrs indexAttrs assemblyAttrs collectMetricsAttrs softClipsToSplitReadsAttrs config; } inputs} input.vcf
java -Xmx${heapSize} -Dsamjdk.create_index=true \
-cp ${jar} gridss.AnnotateVariants \
VERBOSITY=WARNING \
REFERENCE_SEQUENCE=ref.fa \
${concatMapStringsSep " " (i: "INPUT='${i}'") inputs} \
ASSEMBLY=${assembly} \
INPUT_VCF=input.vcf \
OUTPUT_VCF=out.vcf \
WORKING_DIR=$TMPDIR/ \
${optionalString (config != null) ("OPTIONS_FILE=" + bionix.gridss.gridssConfig config)} \
TMP_DIR=$TMPDIR/
mv out.vcf $out
'';
passthru = {
filetype = filetype.vcf { ref = ref; };
gridss.assembly = assembly;
};
};
annotateAndAssemble = stage rec {
name = "gridss-annotateVariants-assemble";
buildCommand = ''
mkdir $out
ln -s ${annotate} $out/gridss.vcf
ln -s ${assembly} $out/gridss.bam
ln -s ${bionix.samtools.index indexAttrs assembly} $out/gridss.bam.bai
'';
};
}
|