...
 
Commits (2)
......@@ -41,9 +41,15 @@ START=$(date +%s.%N)
module purge
# load the most current version of GNU parallel
module load tools/parallel
module load tools/parallel/20190822
#module load lang/Python/3.6.4-foss-2018a
module load lang/Python/3.7.4-GCCcore-8.3.0
#TODO: find a solution for the bug in BLAST+ AND to select the version by hand
module load bio/BLAST+/2.9.0-gompi-2019a
#module load bio/BLAST+/2.7.1-foss-2018a
module load lang/Python/3.6.4-foss-2018a
### setup variable for THIS script; giving absolute path if necessary
SCRIPT="$0"
......@@ -444,7 +450,6 @@ if [[ $_arg_test == "off" ]] && [ ! -d "$DATABASE" ]; then
exit 1
fi
#TODO: differentiate between blastn,x,p -- for now, all are equal
if [ "blastx" = "${_arg_executable,,}" ]; then
executable="blastx"
threads=2
......@@ -545,7 +550,7 @@ if [ -z "$SLURM_JOB_ID" ] && [[ $_arg_test == "off" ]]; then
echo "removing directory $JOBTAG"
rm -r $JOBTAG
else
echo "So you want to continue regardless? (e.g. scratch files already existing) ([y]/n)"
echo "So, you want to continue regardless (using the existing scratch files)? ([y]/n)"
echo -n '>'
read ENTER
if [[ ${ENTER,,} = 'n' || ${ENTER,,} == 'no' ]] ; then
......@@ -572,11 +577,13 @@ FASTA="$FASTAPATH/$FASTAID"
### setup blast and splitup executable; check if exist
allowed_executables="blastx blastp blastn"
if [[ ! $allowed_executables =~ (^|[[:space:]])"$_arg_executable"($|[[:space:]]) ]]; then
# BLASTEXE=$(which $_arg_executable)
#else
if [[ ! $allowed_executables =~ (^| [[:space:]])"$_arg_executable"($| ) ]]; then
BLASTEXE=$(which $_arg_executable)
else
error "$_arg_executable ought to be one of [$allowed_executables]"
exit 1
fi
export _arg_executable
### which is the reference directory size?
_arg_ramdisk=$(du -shL --block-size=1M "$_arg_database" | cut -f1 )M
......@@ -665,40 +672,30 @@ RAMDISK=$JOBDIR/ramdisk
HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s | tr ',', ' ')
QUEUE=''
#myhost=$(hostname -f)
stagefile=/localscratch/$SLURM_JOB_ID/dummy_stagein.sh
rstagefile=/localscratch/$SLURM_JOB_ID/stagein.sh
source "${SCRIPT_PATH}"/stage_in.sh
stage_in_writer
chmod +x $stagefile
# distribute the stagewriter
sbcast $stagefile $rstagefile
rm $stagefile
stagefile=$rstagefile
# we would not need this loop with regard to slurm, but as we have
# asynchronous tasks already, we keep track with the queue
for HOST in $HOSTLIST; do
if [ -L ${DATABASEPATH} ]; then
warning "If the reference directory is a link, fast stage-in is not possible."
for fname in ${DATABASEPATH}/*; do
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
done
else
for fname in ${DATABASEPATH}/*; do
if [ -L "$fname" ]; then
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
else
eval "ssh $HOST dd bs=4096 if=$fname of=${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
fi
done
fi
# TODO: check for dereferencing links, before enabling
# TODO: check for performance, before re-enabling
#sbcast $FILE $RAMDISK/$(basename $FILE)
srun -w $HOST -N1 -n1 -c1 --mem-per-cpu=5000M $stagefile &
queue $!
done
#DATABASE=$RAMDISK/$DATABASE
DATABASE=$RAMDISK #/$(basename $DATABASEPATH)
WORKDIR=$PWD/$BLASTDIR/$SLURM_JOB_NAME
# this script may never output to a user's $HOME
if [[ *"$WORKDIR"* = 'home' ]]; then
eror "Cowardly refusing to operate in a home directory."
error "Cowardly refusing to operate in a home directory."
fi
# set path names to ease maintance
SPLITFILEDIR=scratch
......@@ -709,9 +706,8 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then
mkdir -p "$WORKDIR/output" || exit 1;
cd "$WORKDIR"
echo "executing scratch generator on $FASTA ($_arg_splitup_per_queryfile entries per file)"
eval "${SCRIPT_PATH}/splitter.py $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile
PID=$!
queue $PID
"${SCRIPT_PATH}/splitter.py $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile
queue $!
fi
# wait until the copy and a possible scratch generation are finished
......@@ -720,6 +716,13 @@ while [[ ! -z "$(echo $QUEUE| tr -d ' ')" ]]; do
sleep 5
done
DATABASE=$(find $RAMDISK -name "*${DBSUFFIX}" -print -quit)
#DATABASE=$RAMDISK/db${DBSUFFIX} #/$(basename $DATABASEPATH)
if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
exit 1
fi
cd "$WORKDIR"
# calculating the degree of parallelism is necessary in order not to oversaturate with srun processes.
......@@ -728,15 +731,6 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then
declare -i SLURM_CPUS_PER_TASK=1
fi
if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
fi
# see whether we find a file in the db
tmp=$(find $DATABASE -type f -print -quit)
# remove the 2nd suffix
DATABASE=${tmp%.*}
### a temporary script to conduct the alignment
cmdfile=/localscratch/$SLURM_JOB_ID/dummy.sh
cmdfilewriter
......@@ -749,6 +743,12 @@ sbcast $cmdfile $newcmd
rm $cmdfile
cmdfile=$newcmd
echo "command file:"
cat $newcmd
echo
ls /localscratch/$SLURM_JOBID/ramdisk
### append a finishing token to the samples
samples+=('done')
......