-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunit
executable file
·38 lines (33 loc) · 983 Bytes
/
runit
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#! /bin/bash
# USAGE: runit nodelist script n N [N...]
# n: python jobs per node
# N: number of nodes to run on
nodelist=$1 # list of all nodes to run on
shift
script=$1
shift
n=$1
shift
nnodes=$(cat ${nodelist} | wc -l)
# the number of python interpreters that are started are
# determined by the script
module load parallel
for N in $@; do
if [[ $N -gt $nnodes ]]; then
continue
fi
joblog=$PWD/log/${script}.$n.$N.$(date +'%F_%H-%M').log
shuf -n $N ${nodelist} > ${nodelist}.tmp
for i in $(seq $N); do echo "$i"; done \
| parallel --jobs 1 --env PATH \
--sshdelay 0.001 \
--workdir $PWD \
--sshloginfile ${nodelist}.tmp \
--bar \
$script $n > ${joblog}.tmp
echo "OK: $(grep -c ok ${joblog}.tmp)"
echo "FAIL: $(grep -c fail ${joblog}.tmp)"
egrep -v 'ok|fail' ${joblog}.tmp > ${joblog}
rm ${joblog}.tmp
done
rm ${nodelist}.tmp