Commit efa67b1f authored by Christian Meesters's avatar Christian Meesters

now for blast, too: the cleanup script

parent 90765be8
#!/bin/bash
function shrink_job {
# set up a dummy job step - without it the freed nodes will not become
# available
dummy_step="srun --cpu-bind=q --mem-bind=q -n 1 -N1 --exclusive -c $SLURM_CPUS_PER_TASK --jobid $SLURM_JOBID --mem-per-cpu=300 echo cleanup on $(hostname)"
# loop until we reach 2 running nodes
while true; do
# which hosts are we running on?
HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s | tr ',', ' ')
# transfer this into an array
read -r -a HOSTLIST <<< $HOSTLIST
# and which shall we keep?
keep=()
for host in $(squeue -u $USER --steps -j $SLURM_JOB_ID -h -o '%j %N' | grep -v extern | cut -f2 -d ' '); do
keep+=($host)
done
set -x
# in the case of STAR we need to cleanup the empty hosts,
# before we can leave them over to slurm!
if [[ "STAR" = "$executable" ]]; then
leftover=()
for i in "${HOSTLIST[@]}"; do
skip=
for j in "${keep[@]}"; do
[[ $i == $j ]] && { skip=1; break; }
done
[[ -n $skip ]] || leftover+=("$i")
done
declare -p leftover
# now loop over those leftover nodes and clear shm
#for node in "${leftover[@]}"; do
#srun -w $node STAR --runThreadN 1 --genomeLoad Remove --genomeDir ${_arg_reference}
# ssh $node STAR --runThreadN 1 --genomeLoad Remove --genomeDir ${_arg_reference}
#done
fi
set +x
if [ ${#keep[@]} -lt ${#HOSTLIST[@]} ]; then
# now transform to a SLURM-readable nodelist
keep=$(echo ${keep[@]} | tr ' ' ',')
# eventually shrink the job
scontrol update JobId=$SLURM_JOB_ID NodeList=$keep
. slurm_job_${SLURM_JOB_ID}_resize.sh
rm slurm_job_${SLURM_JOB_ID}_resize.* # as a csh-file is written, too
fi
### How many hosts should be left running, before aborting?
# (This would be an endless loop, otherwise.)
#
# 1. the job master
# 2. the node, where this script runs on (may be different from 1)
# - no other
# Therefore:
if [ ${#keep[@]} -le 2 ]; then
$dummy_step
break
fi
# wait a little while
sleep 10
done
# let's finally exit
exit
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment