aboutsummaryrefslogtreecommitdiff
path: root/lib/qsub.nix
blob: f5a00ec5c816389566448ac0f40409cabd1e597b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
{stdenv, lib, writeScript}:

with lib;

{ ppn, mem, walltime, queue ? null, qsubFlags ? null, tmpDir, sleepTime, qsubPath ? "/usr/bin" }:
drv:
  let ppnReified = if drv.multicore then ppn else 1;
  in lib.overrideDerivation drv ({ args, builder, name, ... }: {
    builder = "/bin/bash";
    args = let
      script = writeScript "qsub-script" ''
        #!${stdenv.shell}
        while [ ! -e ${tmpDir}/qsub-$PBS_JOBID ] ; do
          sleep ${toString sleepTime}
        done
        set -a
        . ${tmpDir}/qsub-$PBS_JOBID/nix-set
        set +a
        TMPDIR=${tmpDir}/qsub-$PBS_JOBID
        TEMP=$TMPDIR
        TMP=$TMPDIR
        NIX_BUILD_TOP=$TMPDIR
        cd $TMPDIR
        ${builder} ${lib.escapeShellArgs args} &> qsub-log
        echo $? > qsub-exit
      '';

      qsub = writeScript "qsub" ''
        #!${stdenv.shell}
        PATH=${qsubPath}
        SHELL=/bin/sh
        NIX_BUILD_CORES=${toString ppnReified}

        while : ; do
          qsub -l nodes=1:ppn=${toString ppnReified},mem=${toString mem}gb,walltime=${walltime} \
            -N "${name}" \
            ${optionalString (queue != null) "-q ${queue}"} \
            ${optionalString (qsubFlags != null) qsubFlags} \
            ${script} 2>&1 > id
          if [ $? -eq 0 ] ; then
            break
          fi
          if ! grep "Please retry" id > /dev/null ; then
            cat id >&2
            exit 1
          fi
          sleep ${toString sleepTime}
        done
        id=$(cat id)
        echo $id

        function cleanup {
          qdel $id 2>/dev/null || true
          sleep ${toString sleepTime}
          rm -rf ${tmpDir}/qsub-$id
        }
        trap cleanup INT TERM EXIT

        cp -r $TMPDIR ${tmpDir}/qsub-$id
        set > ${tmpDir}/qsub-$id/nix-set
        until qstat -f ''${id%%.} 2>&1 | grep "\(Unknown Job\|job_state = C\)" > /dev/null ; do
          sleep ${toString sleepTime}
        done
        cat ${tmpDir}/qsub-$id/qsub-log
        if [ -e ${tmpDir}/qsub-$id/qsub-exit ]; then
          exitCode=$(cat ${tmpDir}/qsub-$id/qsub-exit)
        else
          exitCode=1
        fi
        exit $exitCode
      '';

      in [ "-c" qsub ];
  })