Commit 64b3185a authored by Christian Meesters's avatar Christian Meesters

so far so ungood

parent 0ad2386c
...@@ -43,7 +43,13 @@ module purge ...@@ -43,7 +43,13 @@ module purge
# load the most current version of GNU parallel # load the most current version of GNU parallel
module load tools/parallel module load tools/parallel
module load lang/Python/3.6.4-foss-2018a #module load lang/Python/3.6.4-foss-2018a
module load lang/Python/3.7.4-GCCcore-8.3.0
#TODO: find a solution for the bug in BLAST+ AND to select the version by hand
module load bio/BLAST+/2.9.0-gompi-2019a
#module load bio/BLAST+/2.7.1-foss-2018a
### setup variable for THIS script; giving absolute path if necessary ### setup variable for THIS script; giving absolute path if necessary
SCRIPT="$0" SCRIPT="$0"
...@@ -433,6 +439,16 @@ done ...@@ -433,6 +439,16 @@ done
FASTA=$_arg_fasta FASTA=$_arg_fasta
DATABASE=$_arg_database DATABASE=$_arg_database
### checking db for integrity
if [ -e ${DATABASE}*.fa ]; then
DBSUFFIX=".fa"
elif [ -e ${DATABASE}*.fasta ]; then
DBSUFFIX=".fasta"
else
error "no file '.fa' or '.fasta' found in ${DATABASE} - unable to proceed reliably"
exit 1
fi
### check if query & database exist ### check if query & database exist
if [[ $_arg_test == "off" ]] && [ ! -e "$FASTA" ]; then if [[ $_arg_test == "off" ]] && [ ! -e "$FASTA" ]; then
error "FASTA input was: '$FASTA' - no such file!" error "FASTA input was: '$FASTA' - no such file!"
...@@ -545,7 +561,7 @@ if [ -z "$SLURM_JOB_ID" ] && [[ $_arg_test == "off" ]]; then ...@@ -545,7 +561,7 @@ if [ -z "$SLURM_JOB_ID" ] && [[ $_arg_test == "off" ]]; then
echo "removing directory $JOBTAG" echo "removing directory $JOBTAG"
rm -r $JOBTAG rm -r $JOBTAG
else else
echo "So you want to continue regardless? (e.g. scratch files already existing) ([y]/n)" echo "So, you want to continue regardless (using the existing scratch files)? ([y]/n)"
echo -n '>' echo -n '>'
read ENTER read ENTER
if [[ ${ENTER,,} = 'n' || ${ENTER,,} == 'no' ]] ; then if [[ ${ENTER,,} = 'n' || ${ENTER,,} == 'no' ]] ; then
...@@ -572,11 +588,13 @@ FASTA="$FASTAPATH/$FASTAID" ...@@ -572,11 +588,13 @@ FASTA="$FASTAPATH/$FASTAID"
### setup blast and splitup executable; check if exist ### setup blast and splitup executable; check if exist
allowed_executables="blastx blastp blastn" allowed_executables="blastx blastp blastn"
if [[ ! $allowed_executables =~ (^|[[:space:]])"$_arg_executable"($|[[:space:]]) ]]; then if [[ ! $allowed_executables =~ (^| [[:space:]])"$_arg_executable"($| ) ]]; then
# BLASTEXE=$(which $_arg_executable) BLASTEXE=$(which $_arg_executable)
#else else
error "$_arg_executable ought to be one of [$allowed_executables]" error "$_arg_executable ought to be one of [$allowed_executables]"
exit 1
fi fi
export _arg_executable
### which is the reference directory size? ### which is the reference directory size?
_arg_ramdisk=$(du -shL --block-size=1M "$_arg_database" | cut -f1 )M _arg_ramdisk=$(du -shL --block-size=1M "$_arg_database" | cut -f1 )M
...@@ -665,40 +683,30 @@ RAMDISK=$JOBDIR/ramdisk ...@@ -665,40 +683,30 @@ RAMDISK=$JOBDIR/ramdisk
HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s | tr ',', ' ') HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s | tr ',', ' ')
QUEUE='' QUEUE=''
#myhost=$(hostname -f)
stagefile=/localscratch/$SLURM_JOB_ID/dummy_stagein.sh
rstagefile=/localscratch/$SLURM_JOB_ID/stagein.sh
source "${SCRIPT_PATH}"/stage_in.sh
stage_in_writer
chmod +x $stagefile
# distribute the stagewriter
sbcast $stagefile $rstagefile
rm $stagefile
stagefile=$rstagefile
# we would not need this loop with regard to slurm, but as we have
# asynchronous tasks already, we keep track with the queue
for HOST in $HOSTLIST; do for HOST in $HOSTLIST; do
if [ -L ${DATABASEPATH} ]; then srun -w $HOST -N1 -n1 -c1 --mem-per-cpu=5000M $stagefile &
warning "If the reference directory is a link, fast stage-in is not possible." queue $!
for fname in ${DATABASEPATH}/*; do
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
done
else
for fname in ${DATABASEPATH}/*; do
if [ -L "$fname" ]; then
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
else
eval "ssh $HOST dd bs=4096 if=$fname of=${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
fi
done
fi
# TODO: check for dereferencing links, before enabling
# TODO: check for performance, before re-enabling
#sbcast $FILE $RAMDISK/$(basename $FILE)
done done
#DATABASE=$RAMDISK/$DATABASE
DATABASE=$RAMDISK #/$(basename $DATABASEPATH)
WORKDIR=$PWD/$BLASTDIR/$SLURM_JOB_NAME WORKDIR=$PWD/$BLASTDIR/$SLURM_JOB_NAME
# this script may never output to a user's $HOME # this script may never output to a user's $HOME
if [[ *"$WORKDIR"* = 'home' ]]; then if [[ *"$WORKDIR"* = 'home' ]]; then
eror "Cowardly refusing to operate in a home directory." error "Cowardly refusing to operate in a home directory."
fi fi
# set path names to ease maintance # set path names to ease maintance
SPLITFILEDIR=scratch SPLITFILEDIR=scratch
...@@ -709,9 +717,8 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then ...@@ -709,9 +717,8 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then
mkdir -p "$WORKDIR/output" || exit 1; mkdir -p "$WORKDIR/output" || exit 1;
cd "$WORKDIR" cd "$WORKDIR"
echo "executing scratch generator on $FASTA ($_arg_splitup_per_queryfile entries per file)" echo "executing scratch generator on $FASTA ($_arg_splitup_per_queryfile entries per file)"
eval "${SCRIPT_PATH}/splitter.py $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile "${SCRIPT_PATH}/splitter.py $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile
PID=$! queue $!
queue $PID
fi fi
# wait until the copy and a possible scratch generation are finished # wait until the copy and a possible scratch generation are finished
...@@ -720,6 +727,16 @@ while [[ ! -z "$(echo $QUEUE| tr -d ' ')" ]]; do ...@@ -720,6 +727,16 @@ while [[ ! -z "$(echo $QUEUE| tr -d ' ')" ]]; do
sleep 5 sleep 5
done done
set -x
ls $RAMDISK/*
DATABASE=$(find $RAMDISK -name "*${DBSUFFIX}" -print -quit)
set +x
#DATABASE=$RAMDISK/db${DBSUFFIX} #/$(basename $DATABASEPATH)
if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
exit 1
fi
cd "$WORKDIR" cd "$WORKDIR"
# calculating the degree of parallelism is necessary in order not to oversaturate with srun processes. # calculating the degree of parallelism is necessary in order not to oversaturate with srun processes.
...@@ -728,15 +745,6 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then ...@@ -728,15 +745,6 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then
declare -i SLURM_CPUS_PER_TASK=1 declare -i SLURM_CPUS_PER_TASK=1
fi fi
if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
fi
# see whether we find a file in the db
tmp=$(find $DATABASE -type f -print -quit)
# remove the 2nd suffix
DATABASE=${tmp%.*}
### a temporary script to conduct the alignment ### a temporary script to conduct the alignment
cmdfile=/localscratch/$SLURM_JOB_ID/dummy.sh cmdfile=/localscratch/$SLURM_JOB_ID/dummy.sh
cmdfilewriter cmdfilewriter
...@@ -749,6 +757,12 @@ sbcast $cmdfile $newcmd ...@@ -749,6 +757,12 @@ sbcast $cmdfile $newcmd
rm $cmdfile rm $cmdfile
cmdfile=$newcmd cmdfile=$newcmd
echo "command file:"
cat $newcmd
echo
ls /localscratch/$SLURM_JOBID/ramdisk
### append a finishing token to the samples ### append a finishing token to the samples
samples+=('done') samples+=('done')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment