aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJustin Bedo <cu@cua0.org>2022-02-07 13:18:19 +1100
committerJustin Bedo <cu@cua0.org>2022-02-07 13:18:19 +1100
commit55fa3d7ffc3d3793219afb7d92b7ac2de9d6ce55 (patch)
treec13a8c18cd57b108c17afb427548156182c8c2ef
parent5fc4c8fc0ce707c57204e57a2cd058c019bc8bd8 (diff)
hatchet: init
-rw-r--r--default.nix11
-rw-r--r--tools/hatchet-app.nix44
-rw-r--r--tools/hatchet-call.nix130
-rw-r--r--tools/hatchet.nix8
4 files changed, 188 insertions, 5 deletions
diff --git a/default.nix b/default.nix
index 842f55a..ab69803 100644
--- a/default.nix
+++ b/default.nix
@@ -52,6 +52,7 @@ let
star = callBionix ./tools/star.nix { };
genmap = callBionix ./tools/genmap.nix { };
subread = callBionix ./tools/subread.nix { };
+ hatchet = callBionix ./tools/hatchet.nix { };
slurm-run = callPackage ./lib/slurm.nix { };
slurm-exec = f: x: y:
@@ -100,13 +101,13 @@ let
else
abort "linkOutputs: unsupported type";
link = dst: src: ''
- ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst}
- '';
+ ln -s ${recurse src} $(perl -e 'print $ENV{"${dst}"}') ; ln -s ${recurse src} $out/${dst}
+ '';
in
''
- mkdir $out
- ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)}
- '';
+ mkdir $out
+ ${lib.concatStringsSep "\n" (lib.mapAttrsToList link x)}
+ '';
in
pkgs.stdenvNoCC.mkDerivation {
name = "link-outputs";
diff --git a/tools/hatchet-app.nix b/tools/hatchet-app.nix
new file mode 100644
index 0000000..024d85c
--- /dev/null
+++ b/tools/hatchet-app.nix
@@ -0,0 +1,44 @@
+{ python3Packages, fetchFromGitHub, cmake, gurobi, writeText }:
+
+let
+
+ findgurobi = writeText "FindGUROBI.cmake" ''
+ set(GUROBI_CPP_LIB ${gurobi}/lib/libgurobi_c++.a)
+ set(GUROBI_LIB ${gurobi}/lib/libgurobi91.so)
+ set(GUROBI_INCLUDE_DIR ${gurobi}/include)
+ set(GUROBI_LIBRARIES ''${GUROBI_CPP_LIB} ''${GUROBI_LIB} -lpthread)
+ set(GUROBI_FOUND TRUE)
+ '';
+
+in
+python3Packages.buildPythonApplication rec {
+ pname = "HATCHet";
+ version = "0.4.9";
+
+ src = fetchFromGitHub {
+ owner = "raphael-group";
+ repo = "hatchet";
+ rev = "v${version}";
+ sha256 = "sha256-MB9XFbkLQTf6ZUPrisSzGU8Jeq6SrlMMCQtoyvx/Xvc=";
+ };
+
+ dontConfigure = true;
+
+ patchPhase = ''
+ cat ${findgurobi} > FindGUROBI.cmake
+ '';
+
+ nativeBuildInputs = [ cmake ];
+ propagatedBuildInputs = with python3Packages; [
+ biopython
+ matplotlib
+ pandas
+ psutil
+ pyomo
+ pysam
+ requests
+ seaborn
+ scikit-learn
+ scipy
+ ];
+}
diff --git a/tools/hatchet-call.nix b/tools/hatchet-call.nix
new file mode 100644
index 0000000..a3bd5ca
--- /dev/null
+++ b/tools/hatchet-call.nix
@@ -0,0 +1,130 @@
+{ bionix
+, gurobiLicense ? null
+, count_reads ? true
+, genotype_snps ? true
+, count_alleles ? true
+, combine_counts ? true
+, cluster_bins ? true
+, plot_bins ? true
+, compute_cn ? true
+, plot_cn ? true
+, size ? "50kb"
+, mincov ? 8
+, maxcov ? 300
+, snps
+, phase ? "None"
+, diploidbaf ? 0.08
+, tolerancerdr ? 0.15
+, tolerancebaf ? 0.04
+, sizethreshold ? 0.01
+, figsize ? "6,3"
+, clones ? [ 2 6 ]
+, seeds ? 400
+, minprop ? 0.03
+, diploidcmax ? 6
+, tetraploidcmax ? 12
+, ghostprop ? 0.35
+, limitinc ? 0.6
+, blocklength ? "50kb"
+}:
+
+with bionix;
+with lib;
+with types;
+
+{ normal, tumours }:
+
+let getRef = matchFiletype "hatchet" { bam = { ref, ... }: ref; };
+in
+
+assert all (x: getRef normal == getRef x) tumours;
+
+let
+ ref = getRef normal;
+
+ ini = pkgs.writeText "hatchet.ini" (generators.toINI { } {
+ run = {
+ inherit count_reads genotype_snps count_alleles combine_counts cluster_bins plot_bins compute_cn plot_cn;
+ reference = "${lnRef ref}/ref.fa";
+ normal = "${lnBam normal}/input.bam";
+ bams = concatMapStringsSep " " (x: "${lnBam x}/input.bam") tumours;
+ samples = "@SAMPLES@";
+ output = "./out";
+ processes = "@PROCESSES@";
+ };
+
+ count_reads = { inherit size; };
+ genotype_snps = {
+ inherit mincov maxcov;
+ snps = "${lnVcfBz snps}/vcf.bgz";
+ };
+
+ count_alleles = { inherit mincov maxcov; };
+ combine_counts = { inherit blocklength phase; };
+ cluster_bins = { inherit diploidbaf tolerancerdr tolerancebaf; };
+ plot_bins = { inherit sizethreshold figsize; };
+ compute_cn = {
+ inherit seeds minprop diploidcmax tetraploidcmax ghostprop limitinc;
+ clones = concatMapStringsSep "," builtins.toString clones;
+ };
+
+ });
+
+ lnRef = ref: linkOutputs {
+ "ref.fa" = ref;
+ "ref.fa.fai" = samtools.faidx { } ref;
+ "ref.dict" = samtools.dict { } ref;
+ };
+
+ lnBam = bam:
+ linkOutputs {
+ "input.bam" = bam;
+ "input.bam.bai" = samtools.index { } bam;
+ };
+
+ lnVcfBz = vcf:
+ let bz = compression.bgzip { } vcf;
+ in
+ linkOutputs {
+ "vcf.bgz" = bz;
+ "vcf.bgz.tbi" = samtools.tabix { } bz;
+ };
+
+ getSN =
+ let
+ script = pkgs.writeText "getSN.awk" ''
+ BEGIN{
+ FS=":"
+ RS="[ \t\n]"
+ }
+ $1=="SM"{print $2; exit}
+ '';
+ in
+ pkgs.writeShellScriptBin "getSN" ''
+ exec samtools view -H "$1" | awk -f ${script}
+ '';
+
+in
+stage
+ ({
+ name = "HATCHet";
+ buildInputs = [ hatchet.app getSN pkgs.samtools pkgs.bcftools ] ++ optional (gurobiLicense == null) pkgs.cbc;
+ buildCommand = ''
+ # Get tumour names
+ names="${concatMapStringsSep " " (x: "$(getSN ${x})") tumours}"
+
+ # macro substitute ini file
+ substitute ${ini} hatchet.ini \
+ --replace "@PROCESSES@" "$NIX_BUILD_CORES" \
+ --replace "@SAMPLES@" "$names"
+
+ hatchet run hatchet.ini
+
+ cp -r out $out
+ '';
+ passthru.multicore = true;
+ } // (if gurobiLicense != null then {
+ GRB_LICENSE_FILE = gurobiLicense;
+ } else {
+ HATCHET_COMPUTE_CN_SOLVER = "cbc";
+ }))
diff --git a/tools/hatchet.nix b/tools/hatchet.nix
new file mode 100644
index 0000000..b4867d4
--- /dev/null
+++ b/tools/hatchet.nix
@@ -0,0 +1,8 @@
+{ bionix }:
+
+with bionix;
+
+{
+ app = pkgs.callPackage ./hatchet-app.nix { };
+ call = callBionixE ./hatchet-call.nix;
+}