From 09864bb48d3452f52c4ff4f971ac48989726d898 Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 23 Nov 2018 11:21:51 +1100 Subject: qsub: be more specific with qstat polling --- lib/qsub.nix | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/qsub.nix b/lib/qsub.nix index f0b0660..12981f9 100644 --- a/lib/qsub.nix +++ b/lib/qsub.nix @@ -21,14 +21,14 @@ ''; qsub = writeScript "qsub" '' - #!/bin/bash + #!${stdenv.shell} PATH=/usr/bin:/bin:/usr/sbin:/sbin SHELL=/bin/sh NIX_BUILD_CORES=${toString ppn} id=$(qsub -l nodes=1:ppn=${toString ppn},mem=${toString mem}gb,walltime=${walltime} -N "${name}" ${script}) function cleanup { - qstat ''${id%%.} 2> /dev/null > /dev/null && qdel $id || true + qdel $id 2>/dev/null || true sleep 5 rm -rf ${tmpDir}/$id } @@ -36,12 +36,16 @@ cp -r $TMPDIR ${tmpDir}/$id set > ${tmpDir}/$id/nix-set - while qstat ''${id%%.} 2> /dev/null > /dev/null ; do - sleep 5 + until qstat ''${id%%.} 2>&1 | grep "Unknown Job" > /dev/null ; do + sleep 60 done cat ${tmpDir}/$id/qsub-stderr >&2 cat ${tmpDir}/$id/qsub-stdout - exitCode=$(cat ${tmpDir}/$id/qsub-exit) + if [ -e ${tmpDir}/$id/qsub-exit ]; then + exitCode=$(cat ${tmpDir}/$id/qsub-exit) + else + exitCode=1 + fi exit $exitCode ''; -- cgit v1.2.3 From 1a75f5789073c0a655fd20ba269e9509d773b1fc Mon Sep 17 00:00:00 2001 From: Justin Bedo Date: Fri, 23 Nov 2018 14:41:37 +1100 Subject: qsub: update to check job_state field --- lib/qsub.nix | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/qsub.nix b/lib/qsub.nix index 12981f9..d6a34fe 100644 --- a/lib/qsub.nix +++ b/lib/qsub.nix @@ -36,7 +36,7 @@ cp -r $TMPDIR ${tmpDir}/$id set > ${tmpDir}/$id/nix-set - until qstat ''${id%%.} 2>&1 | grep "Unknown Job" > /dev/null ; do + until qstat -f ''${id%%.} 2>&1 | grep "\(Unknown Job\|job_state = C\)" > /dev/null ; do sleep 60 done cat ${tmpDir}/$id/qsub-stderr >&2 -- cgit v1.2.3