aboutsummaryrefslogtreecommitdiff
path: root/lib/qsub.nix
blob: a8827a85195e5a372c0f84f0aa1d550a64867b9b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
{ stdenv, lib, writeScript }:

with lib;

let escape = x: if builtins.typeOf x == "string" then escapeShellArg x else x;

in { ppn, mem, walltime, queue ? null, qsubFlags ? null, tmpDir, sleepTime
, qsubPath ? "/usr/bin" }:
drv:
let ppnReified = if drv.multicore then ppn else 1;
in overrideDerivation drv ({ args, builder, name, ... }: {
  builder = "/bin/bash";
  args = let
    script = writeScript "qsub-script" ''
      #!${stdenv.shell}
      while [ ! -e ${tmpDir}/qsub-$PBS_JOBID ] ; do
        sleep ${toString sleepTime}
      done
      set -a
      . ${tmpDir}/qsub-$PBS_JOBID/nix-set
      set +a
      TMPDIR=${tmpDir}/qsub-$PBS_JOBID
      TEMP=$TMPDIR
      TMP=$TMPDIR
      NIX_BUILD_TOP=$TMPDIR
      cd $TMPDIR
      ${builder} ${concatMapStringsSep " " escape args} &> qsub-log
      echo $? > qsub-exit
    '';

    qsub = writeScript "qsub" ''
      #!${stdenv.shell}
      PATH=${qsubPath}
      SHELL=/bin/sh
      NIX_BUILD_CORES=${toString ppnReified}

      while : ; do
        qsub -l nodes=1:ppn=${toString ppnReified},mem=${
          toString mem
        }gb,walltime=${walltime} \
          -N "${name}" \
          ${optionalString (queue != null) "-q ${queue}"} \
          ${optionalString (qsubFlags != null) qsubFlags} \
          ${script} 2>&1 > id
        if [ $? -eq 0 ] ; then
          break
        fi
        if ! grep "Please retry" id > /dev/null ; then
          cat id >&2
          exit 1
        fi
        sleep ${toString sleepTime}
      done
      id=$(cat id)
      echo $id

      function cleanup {
        qdel $id 2>/dev/null || true
        sleep ${toString sleepTime}
        rm -rf ${tmpDir}/qsub-$id
      }
      trap cleanup INT TERM EXIT

      cp -r $TMPDIR ${tmpDir}/qsub-$id
      set > ${tmpDir}/qsub-$id/nix-set
      until qstat -f ''${id%%.} 2>&1 | grep "\(Unknown Job\|job_state = C\)" > /dev/null ; do
        sleep ${toString sleepTime}
      done
      cat ${tmpDir}/qsub-$id/qsub-log
      if [ -e ${tmpDir}/qsub-$id/qsub-exit ]; then
        exitCode=$(cat ${tmpDir}/qsub-$id/qsub-exit)
      else
        exitCode=1
      fi
      exit $exitCode
    '';

  in [ "-c" qsub ];
})