aboutsummaryrefslogtreecommitdiff
path: root/tools/ascat-gccorrect.nix
blob: 584c2ae3b36ff3a255aec735c68fc39d23872a8f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
{ bionix
, ref
, chrPrefix ? ""
, flags ? null
}:

snp:

with bionix;
with lib;
with types;

stage rec {
  name = "ascat-gccorrect";
  buildInputs = with pkgs; [ ascat.app gawk ];
  script = pkgs.writeText "convert.awk" ''
    BEGIN{
      FS = OFS = "\t"
    }
    /^#/{next}
    !loc[$1,$2]{
      print $3, "${chrPrefix}" $1, $2
      loc[$1,$2]++
    }
  '';
  buildCommand = ''
    awk -f ${script} ${snp} > snpPos.tsv
    mkdir splitPos splitGc splitGcLogs
    split --number=l/$NIX_BUILD_CORES -d snpPos.tsv splitPos/snpPos.
    ls -1 splitPos/ | xargs -n1 -P$NIX_BUILD_CORES -I '{}' sh -c 'ascatSnpPanelGcCorrections.pl ${ref} splitPos/{} > splitGc/{}'
    mv splitGc/snpPos.00 $out
    for f in splitGc/* ; do
      sed 1d $f >> $out
    done
  '';
  passthru.multicore = true;
}