From 55fa3d7ffc3d3793219afb7d92b7ac2de9d6ce55 Mon Sep 17 00:00:00 2001
From: Justin Bedo <cu@cua0.org>
Date: Mon, 7 Feb 2022 13:18:19 +1100
Subject: hatchet: init

---
 default.nix            |  11 +++--
 tools/hatchet-app.nix  |  44 +++++++++++++++++
 tools/hatchet-call.nix | 130 +++++++++++++++++++++++++++++++++++++++++++++++++
 tools/hatchet.nix      |   8 +++
 4 files changed, 188 insertions(+), 5 deletions(-)
 create mode 100644 tools/hatchet-app.nix
 create mode 100644 tools/hatchet-call.nix
 create mode 100644 tools/hatchet.nix

diff --git a/default.nix b/default.nix
index 842f55a..ab69803 100644
--- a/default.nix
+++ b/default.nix
@@ -52,6 +52,7 @@ let
       star = callBionix ./tools/star.nix { };
       genmap = callBionix ./tools/genmap.nix { };
       subread = callBionix ./tools/subread.nix { };
+      hatchet = callBionix ./tools/hatchet.nix { };
 
       slurm-run = callPackage ./lib/slurm.nix { };
       slurm-exec = f: x: y:
@@ -100,13 +101,13 @@ let
                 else
                   abort "linkOutputs: unsupported type";
               link = dst: src: ''
-           ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst}
-         '';
+                ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst}
+              '';
             in
             ''
-         mkdir $out
-         ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)}
-       '';
+              mkdir $out
+              ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)}
+            '';
         in
         pkgs.stdenvNoCC.mkDerivation {
           name = "link-outputs";
diff --git a/tools/hatchet-app.nix b/tools/hatchet-app.nix
new file mode 100644
index 0000000..024d85c
--- /dev/null
+++ b/tools/hatchet-app.nix
@@ -0,0 +1,44 @@
+{ python3Packages, fetchFromGitHub, cmake, gurobi, writeText }:
+
+let
+
+  findgurobi = writeText "FindGUROBI.cmake" ''
+    set(GUROBI_CPP_LIB ${gurobi}/lib/libgurobi_c++.a)
+    set(GUROBI_LIB ${gurobi}/lib/libgurobi91.so)
+    set(GUROBI_INCLUDE_DIR ${gurobi}/include)
+    set(GUROBI_LIBRARIES ''${GUROBI_CPP_LIB} ''${GUROBI_LIB} -lpthread)
+    set(GUROBI_FOUND TRUE)
+  '';
+
+in
+python3Packages.buildPythonApplication rec {
+  pname = "HATCHet";
+  version = "0.4.9";
+
+  src = fetchFromGitHub {
+    owner = "raphael-group";
+    repo = "hatchet";
+    rev = "v${version}";
+    sha256 = "sha256-MB9XFbkLQTf6ZUPrisSzGU8Jeq6SrlMMCQtoyvx/Xvc=";
+  };
+
+  dontConfigure = true;
+
+  patchPhase = ''
+    cat ${findgurobi} > FindGUROBI.cmake
+  '';
+
+  nativeBuildInputs = [ cmake ];
+  propagatedBuildInputs = with python3Packages; [
+    biopython
+    matplotlib
+    pandas
+    psutil
+    pyomo
+    pysam
+    requests
+    seaborn
+    scikit-learn
+    scipy
+  ];
+}
diff --git a/tools/hatchet-call.nix b/tools/hatchet-call.nix
new file mode 100644
index 0000000..a3bd5ca
--- /dev/null
+++ b/tools/hatchet-call.nix
@@ -0,0 +1,130 @@
+{ bionix
+, gurobiLicense ? null
+, count_reads ? true
+, genotype_snps ? true
+, count_alleles ? true
+, combine_counts ? true
+, cluster_bins ? true
+, plot_bins ? true
+, compute_cn ? true
+, plot_cn ? true
+, size ? "50kb"
+, mincov ? 8
+, maxcov ? 300
+, snps
+, phase ? "None"
+, diploidbaf ? 0.08
+, tolerancerdr ? 0.15
+, tolerancebaf ? 0.04
+, sizethreshold ? 0.01
+, figsize ? "6,3"
+, clones ? [ 2 6 ]
+, seeds ? 400
+, minprop ? 0.03
+, diploidcmax ? 6
+, tetraploidcmax ? 12
+, ghostprop ? 0.35
+, limitinc ? 0.6
+, blocklength ? "50kb"
+}:
+
+with bionix;
+with lib;
+with types;
+
+{ normal, tumours }:
+
+let getRef = matchFiletype "hatchet" { bam = { ref, ... }: ref; };
+in
+
+assert all (x: getRef normal == getRef x) tumours;
+
+let
+  ref = getRef normal;
+
+  ini = pkgs.writeText "hatchet.ini" (generators.toINI { } {
+    run = {
+      inherit count_reads genotype_snps count_alleles combine_counts cluster_bins plot_bins compute_cn plot_cn;
+      reference = "${lnRef ref}/ref.fa";
+      normal = "${lnBam normal}/input.bam";
+      bams = concatMapStringsSep " " (x: "${lnBam x}/input.bam") tumours;
+      samples = "@SAMPLES@";
+      output = "./out";
+      processes = "@PROCESSES@";
+    };
+
+    count_reads = { inherit size; };
+    genotype_snps = {
+      inherit mincov maxcov;
+      snps = "${lnVcfBz snps}/vcf.bgz";
+    };
+
+    count_alleles = { inherit mincov maxcov; };
+    combine_counts = { inherit blocklength phase; };
+    cluster_bins = { inherit diploidbaf tolerancerdr tolerancebaf; };
+    plot_bins = { inherit sizethreshold figsize; };
+    compute_cn = {
+      inherit seeds minprop diploidcmax tetraploidcmax ghostprop limitinc;
+      clones = concatMapStringsSep "," builtins.toString clones;
+    };
+
+  });
+
+  lnRef = ref: linkOutputs {
+    "ref.fa" = ref;
+    "ref.fa.fai" = samtools.faidx { } ref;
+    "ref.dict" = samtools.dict { } ref;
+  };
+
+  lnBam = bam:
+    linkOutputs {
+      "input.bam" = bam;
+      "input.bam.bai" = samtools.index { } bam;
+    };
+
+  lnVcfBz = vcf:
+    let bz = compression.bgzip { } vcf;
+    in
+    linkOutputs {
+      "vcf.bgz" = bz;
+      "vcf.bgz.tbi" = samtools.tabix { } bz;
+    };
+
+  getSN =
+    let
+      script = pkgs.writeText "getSN.awk" ''
+        BEGIN{
+          FS=":"
+          RS="[ \t\n]"
+        }
+        $1=="SM"{print $2; exit}
+      '';
+    in
+    pkgs.writeShellScriptBin "getSN" ''
+      exec samtools view -H "$1" | awk -f ${script}
+    '';
+
+in
+stage
+  ({
+    name = "HATCHet";
+    buildInputs = [ hatchet.app getSN pkgs.samtools pkgs.bcftools ] ++ optional (gurobiLicense == null) pkgs.cbc;
+    buildCommand = ''
+      # Get tumour names
+      names="${concatMapStringsSep " " (x: "$(getSN ${x})") tumours}"
+
+      # macro substitute ini file
+      substitute ${ini} hatchet.ini \
+        --replace "@PROCESSES@" "$NIX_BUILD_CORES" \
+        --replace "@SAMPLES@" "$names"
+
+      hatchet run hatchet.ini
+
+      cp -r out $out
+    '';
+    passthru.multicore = true;
+  } // (if gurobiLicense != null then {
+    GRB_LICENSE_FILE = gurobiLicense;
+  } else {
+    HATCHET_COMPUTE_CN_SOLVER = "cbc";
+  }))
diff --git a/tools/hatchet.nix b/tools/hatchet.nix
new file mode 100644
index 0000000..b4867d4
--- /dev/null
+++ b/tools/hatchet.nix
@@ -0,0 +1,8 @@
+{ bionix }:
+
+with bionix;
+
+{
+  app = pkgs.callPackage ./hatchet-app.nix { };
+  call = callBionixE ./hatchet-call.nix;
+}
-- 
cgit v1.2.3