diff options
author | Justin Bedo <cu@cua0.org> | 2022-02-07 13:18:19 +1100 |
---|---|---|
committer | Justin Bedo <cu@cua0.org> | 2022-02-07 13:18:19 +1100 |
commit | 55fa3d7ffc3d3793219afb7d92b7ac2de9d6ce55 (patch) | |
tree | c13a8c18cd57b108c17afb427548156182c8c2ef | |
parent | 5fc4c8fc0ce707c57204e57a2cd058c019bc8bd8 (diff) |
hatchet: init
-rw-r--r-- | default.nix | 11 | ||||
-rw-r--r-- | tools/hatchet-app.nix | 44 | ||||
-rw-r--r-- | tools/hatchet-call.nix | 130 | ||||
-rw-r--r-- | tools/hatchet.nix | 8 |
4 files changed, 188 insertions, 5 deletions
diff --git a/default.nix b/default.nix index 842f55a..ab69803 100644 --- a/default.nix +++ b/default.nix @@ -52,6 +52,7 @@ let star = callBionix ./tools/star.nix { }; genmap = callBionix ./tools/genmap.nix { }; subread = callBionix ./tools/subread.nix { }; + hatchet = callBionix ./tools/hatchet.nix { }; slurm-run = callPackage ./lib/slurm.nix { }; slurm-exec = f: x: y: @@ -100,13 +101,13 @@ let else abort "linkOutputs: unsupported type"; link = dst: src: '' - ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst} - ''; + ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst} + ''; in '' - mkdir $out - ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)} - ''; + mkdir $out + ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)} + ''; in pkgs.stdenvNoCC.mkDerivation { name = "link-outputs"; diff --git a/tools/hatchet-app.nix b/tools/hatchet-app.nix new file mode 100644 index 0000000..024d85c --- /dev/null +++ b/tools/hatchet-app.nix @@ -0,0 +1,44 @@ +{ python3Packages, fetchFromGitHub, cmake, gurobi, writeText }: + +let + + findgurobi = writeText "FindGUROBI.cmake" '' + set(GUROBI_CPP_LIB ${gurobi}/lib/libgurobi_c++.a) + set(GUROBI_LIB ${gurobi}/lib/libgurobi91.so) + set(GUROBI_INCLUDE_DIR ${gurobi}/include) + set(GUROBI_LIBRARIES ''${GUROBI_CPP_LIB} ''${GUROBI_LIB} -lpthread) + set(GUROBI_FOUND TRUE) + ''; + +in +python3Packages.buildPythonApplication rec { + pname = "HATCHet"; + version = "0.4.9"; + + src = fetchFromGitHub { + owner = "raphael-group"; + repo = "hatchet"; + rev = "v${version}"; + sha256 = "sha256-MB9XFbkLQTf6ZUPrisSzGU8Jeq6SrlMMCQtoyvx/Xvc="; + }; + + dontConfigure = true; + + patchPhase = '' + cat ${findgurobi} > FindGUROBI.cmake + ''; + + nativeBuildInputs = [ cmake ]; + propagatedBuildInputs = with python3Packages; [ + biopython + matplotlib + pandas + psutil + pyomo + pysam + requests + seaborn + scikit-learn + scipy + ]; +} diff --git a/tools/hatchet-call.nix b/tools/hatchet-call.nix new file mode 100644 index 0000000..a3bd5ca --- /dev/null +++ b/tools/hatchet-call.nix @@ -0,0 +1,130 @@ +{ bionix +, gurobiLicense ? null +, count_reads ? true +, genotype_snps ? true +, count_alleles ? true +, combine_counts ? true +, cluster_bins ? true +, plot_bins ? true +, compute_cn ? true +, plot_cn ? true +, size ? "50kb" +, mincov ? 8 +, maxcov ? 300 +, snps +, phase ? "None" +, diploidbaf ? 0.08 +, tolerancerdr ? 0.15 +, tolerancebaf ? 0.04 +, sizethreshold ? 0.01 +, figsize ? "6,3" +, clones ? [ 2 6 ] +, seeds ? 400 +, minprop ? 0.03 +, diploidcmax ? 6 +, tetraploidcmax ? 12 +, ghostprop ? 0.35 +, limitinc ? 0.6 +, blocklength ? "50kb" +}: + +with bionix; +with lib; +with types; + +{ normal, tumours }: + +let getRef = matchFiletype "hatchet" { bam = { ref, ... }: ref; }; +in + +assert all (x: getRef normal == getRef x) tumours; + +let + ref = getRef normal; + + ini = pkgs.writeText "hatchet.ini" (generators.toINI { } { + run = { + inherit count_reads genotype_snps count_alleles combine_counts cluster_bins plot_bins compute_cn plot_cn; + reference = "${lnRef ref}/ref.fa"; + normal = "${lnBam normal}/input.bam"; + bams = concatMapStringsSep " " (x: "${lnBam x}/input.bam") tumours; + samples = "@SAMPLES@"; + output = "./out"; + processes = "@PROCESSES@"; + }; + + count_reads = { inherit size; }; + genotype_snps = { + inherit mincov maxcov; + snps = "${lnVcfBz snps}/vcf.bgz"; + }; + + count_alleles = { inherit mincov maxcov; }; + combine_counts = { inherit blocklength phase; }; + cluster_bins = { inherit diploidbaf tolerancerdr tolerancebaf; }; + plot_bins = { inherit sizethreshold figsize; }; + compute_cn = { + inherit seeds minprop diploidcmax tetraploidcmax ghostprop limitinc; + clones = concatMapStringsSep "," builtins.toString clones; + }; + + }); + + lnRef = ref: linkOutputs { + "ref.fa" = ref; + "ref.fa.fai" = samtools.faidx { } ref; + "ref.dict" = samtools.dict { } ref; + }; + + lnBam = bam: + linkOutputs { + "input.bam" = bam; + "input.bam.bai" = samtools.index { } bam; + }; + + lnVcfBz = vcf: + let bz = compression.bgzip { } vcf; + in + linkOutputs { + "vcf.bgz" = bz; + "vcf.bgz.tbi" = samtools.tabix { } bz; + }; + + getSN = + let + script = pkgs.writeText "getSN.awk" '' + BEGIN{ + FS=":" + RS="[ \t\n]" + } + $1=="SM"{print $2; exit} + ''; + in + pkgs.writeShellScriptBin "getSN" '' + exec samtools view -H "$1" | awk -f ${script} + ''; + +in +stage + ({ + name = "HATCHet"; + buildInputs = [ hatchet.app getSN pkgs.samtools pkgs.bcftools ] ++ optional (gurobiLicense == null) pkgs.cbc; + buildCommand = '' + # Get tumour names + names="${concatMapStringsSep " " (x: "$(getSN ${x})") tumours}" + + # macro substitute ini file + substitute ${ini} hatchet.ini \ + --replace "@PROCESSES@" "$NIX_BUILD_CORES" \ + --replace "@SAMPLES@" "$names" + + hatchet run hatchet.ini + + cp -r out $out + ''; + passthru.multicore = true; + } // (if gurobiLicense != null then { + GRB_LICENSE_FILE = gurobiLicense; + } else { + HATCHET_COMPUTE_CN_SOLVER = "cbc"; + })) diff --git a/tools/hatchet.nix b/tools/hatchet.nix new file mode 100644 index 0000000..b4867d4 --- /dev/null +++ b/tools/hatchet.nix @@ -0,0 +1,8 @@ +{ bionix }: + +with bionix; + +{ + app = pkgs.callPackage ./hatchet-app.nix { }; + call = callBionixE ./hatchet-call.nix; +} |