aboutsummaryrefslogtreecommitdiff
path: root/lib/qsub.nix
blob: 2528162bc7305aa85a95a147b2ec826144a00974 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
{ stdenv, lib, writeScript }:

with lib;

let escape = x: if builtins.typeOf x == "string" then escapeShellArg x else x;

in
{ ppn
, mem
, walltime
, queue ? null
, qsubFlags ? null
, tmpDir
, sleepTime
, qsubPath ? "/usr/bin"
}:
drv:
let ppnReified = if drv.multicore then ppn else 1;
in
overrideDerivation drv ({ args, builder, name, ... }: {
  builder = "/bin/bash";
  args =
    let
      script = writeScript "qsub-script" ''
        #!${stdenv.shell}
        while [ ! -e ${tmpDir}/qsub-$PBS_JOBID ] ; do
          sleep ${toString sleepTime}
        done
        set -a
        . ${tmpDir}/qsub-$PBS_JOBID/nix-set
        set +a
        TMPDIR=${tmpDir}/qsub-$PBS_JOBID
        TEMP=$TMPDIR
        TMP=$TMPDIR
        NIX_BUILD_TOP=$TMPDIR
        cd $TMPDIR
        ${builder} ${concatMapStringsSep " " escape args} &> qsub-log
        echo $? > qsub-exit
      '';

      qsub = writeScript "qsub" ''
        #!${stdenv.shell}
        PATH=${qsubPath}
        SHELL=/bin/sh
        NIX_BUILD_CORES=${toString ppnReified}

        while : ; do
          qsub -l nodes=1:ppn=${toString ppnReified},mem=${
            toString mem
          }gb,walltime=${walltime} \
            -N "${name}" \
            ${optionalString (queue != null) "-q ${queue}"} \
            ${optionalString (qsubFlags != null) qsubFlags} \
            ${script} 2>&1 > id
          if [ $? -eq 0 ] ; then
            break
          fi
          if ! grep "Please retry" id > /dev/null ; then
            cat id >&2
            exit 1
          fi
          sleep ${toString sleepTime}
        done
        id=$(cat id)
        echo $id

        function cleanup {
          qdel $id 2>/dev/null || true
          sleep ${toString sleepTime}
          rm -rf ${tmpDir}/qsub-$id
        }
        trap cleanup INT TERM EXIT

        cp -r $TMPDIR ${tmpDir}/qsub-$id
        set > ${tmpDir}/qsub-$id/nix-set
        until qstat -f ''${id%%.} 2>&1 | grep "\(Unknown Job\|job_state = C\)" > /dev/null ; do
          sleep ${toString sleepTime}
        done
        cat ${tmpDir}/qsub-$id/qsub-log
        if [ -e ${tmpDir}/qsub-$id/qsub-exit ]; then
          exitCode=$(cat ${tmpDir}/qsub-$id/qsub-exit)
        else
          exitCode=1
        fi
        exit $exitCode
      '';

    in
    [ "-c" qsub ];
})