Commit 6b266777 authored by Christian Meesters's avatar Christian Meesters

fixed, see issue 27 - was bc and db issue

parent efa67b1f
function cmdfilewriter()
{
cat <<EOF > $cmdfile
#!/bin/bash
module load bio/BLAST+
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
shrink_job
exit
fi
outfname=\$(basename \${1%.fasta})
tmp_out=${JOBDIR}/\$outfname
trap "rm -f \$tmp_out" EXIT
START_BLAST=\$(date +%s)
$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $DATABASE $BLASTPARAMS -query \$1 -out \$tmp_out
END_BLAST=\$(date +%s)
elapsed=\$(bc <<< "scale=1; \$((\$END_BLAST - \$START_BLAST))/60")
echo "Elapsed: \$elapsed"
# compress, when done
gzip \$tmp_out
# copy back, when ready
mv \${tmp_out}.gz ./output/\${outfname}.gz
EOF
}
......@@ -30,8 +30,8 @@
#set -x
#set -e
#PS4='Line ${LINENO}: '
set -e
PS4='Line ${LINENO}: '
# to measure the excecution time independent of SLURM
START=$(date +%s.%N)
......@@ -49,9 +49,9 @@ module load bio/BLAST+/2.7.1-foss-2018a
module load lang/Python/3.6.4-foss-2018a
### setup variable for THIS script; giving absolute path if necessary
SCRIPT="$0"
SCRIPT_VERSION="0.4"
export SCRIPT_PATH=$(dirname $0)
# TODO: delete the following 3 functions, once sbcast is working
function queue {
......@@ -428,10 +428,36 @@ if [ ! -e "$FASTA" ]; then
exit 1
fi
#if [[ ! -e "$DATABASE" ]]; then
# echo "$DATABASE : not found!"
# exit 1
#fi
#TODO: differentiate between blastn,x,p -- for now, all are equal
if [ "blastx" = "${_arg_executable,,}" ]; then
executable="blastx"
threads=2
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname "$0")/blast_wrap.sh
else
source "${SCRIPT_PATH}"/blast_wrap.sh
fi
elif [ "blastn" = "${_arg_executable,,}" ]; then
executable="blastn"
threads=8
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname "$0")/blast_wrap.sh
else
source "${SCRIPT_PATH}"/blast_wrap.sh
fi
elif [ "blastp" = "${_arg_executable,,}" ]; then
executable="blastp"
threads=2
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname "$0")/blast_wrap.sh
else
source "${SCRIPT_PATH}"/blast_wrap.sh
fi
else
error "executable '$_arg_executable' not recognized."
print_help
exit 2
fi
### prepare filepath and -names for creating working folder
FASTAPATH=$(dirname $FASTA)
......@@ -525,16 +551,22 @@ fi
### which is the reference directory size?
_arg_ramdisk=$(du -shL --block-size=1M "$_arg_database" | cut -f1 )M
### setup variable for THIS script; giving absolute path if necessary
SCRIPT="$0"
if [[ ! $SCRIPT == /* ]]; then
SCRIPT="$PWD/$SCRIPT";
fi
### check if this script is on node by checking env-variable $SLURM_JOB_ID, else send it to SLURM with given parameters and exit
if [ -z "$SLURM_JOB_ID" ]; then
submit_call="sbatch --no-requeue -o ${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -A $_arg_assoc -t $_arg_runlimit -N $_arg_nodes -n $((64 * $_arg_nodes / $_arg_blast_threads)) --mem=$_arg_mem --ramdisk=${_arg_ramdisk} -c $_arg_blast_threads --reservation=$_arg_reservation $SCRIPT --partition $_arg_queue --account $_arg_assoc --nodes $_arg_nodes --time $_arg_runlimit --reservation=$_arg_reservation --threads $_arg_blast_threads --splitup $_arg_splitup_per_queryfile --blastparams=\"$BLASTPARAMS\" --executable=$_arg_executable $FASTA $DATABASE"
export SCRIPT_PATH=$(dirname $0)
submit_statement="sbatch --no-requeue -o ${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -A $_arg_assoc -t $_arg_runlimit -N $_arg_nodes -n $((64 * $_arg_nodes / $threads)) --mem=$_arg_mem --ramdisk=${_arg_ramdisk} -c $threads"
script_statement="$SCRIPT --partition $_arg_queue --account $_arg_assoc --nodes $_arg_nodes --time $_arg_runlimit --reservation=$_arg_reservation --threads $_arg_blast_threads --splitup $_arg_splitup_per_queryfile --blastparams=\"$BLASTPARAMS\" --executable=$_arg_executable $FASTA $DATABASE"
if [ -n "$_arg_reservation" ]; then
submit_statement="${submit_statement} --reservation=${_arg_reservation}"
fi
# paste them together
submit_call="${submit_statement} ${script_statement}"
if [[ $_arg_test == "on" ]]; then
echo "Just testing - this command would be submitted:"
echo $submit_call
......@@ -616,58 +648,34 @@ if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
fi
# see whether we find a typical file in the db
tmp=$(find $DATABASE -name '*00*hr')
# see whether we find a file in the db
tmp=$(find $DATABASE -type f -print -quit)
# remove the 2nd suffix
DATABASE=${tmp%.*}
#export -f run_blast
cmd=$PWD/cmd_file.sh
rm -f $cmd
echo '#!/bin/bash' > $cmd
#echo 'sleep 300' >> $cmd
echo 'set -x' >> $cmd
echo 'module load bio/BLAST+' >> $cmd
#echo 'echo $(pwd)' >> $cmd
echo 'cd $(dirname $(realpath $0))' >> $cmd
#TODO: write output to local host, copy back, when ready
echo 'outfname=$(basename ${1%fasta}xml)' >> $cmd
# temporary output goes here:
printf "tmp_out=$JOBDIR/" >> $cmd
echo '$outfname' >> $cmd
echo 'trap "rm -f $tmp_out" EXIT' >> $cmd
echo 'START_BLAST=$(date +%s.%N)' >> $cmd
printf "$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $DATABASE $BLASTPARAMS " >> $cmd
echo '-query $1 -out $tmp_out' >> $cmd
# marks the end of this run
echo 'END_BLAST=$(date +%s.%N)' >> $cmd
echo 'elapsed=$(bc <<< "scale=1; ((END_BLAST-START_BLAST))/60")' >> $cmd
echo 'echo \"Elapsed: \"$elapsed' >> $cmd
# compress, when done
echo 'gzip $tmp_out' >> $cmd
# copy back, when ready
echo 'mv ${tmp_out}.gz ./output/${outfname}.gz' >> $cmd
chmod +x $cmd
trap "rm -f $cmd" EXIT
### a temporary script to conduct the alignment
cmdfile=/localscratch/$SLURM_JOB_ID/dummy.sh
cmdfilewriter
# define our srun-dropin for the subsequent call to be distributed amoung the reserved nodes
#srun="srun -n 1 -N1 --exclusive -c $SLURM_CPUS_PER_TASK --jobid $SLURM_JOBID --mem-per-cpu=$((SLURM_MEM_PER_NODE / SLURM_NTASKS))"
# likewise the correct number of semaphores
#parallel="parallel --no-notice -j $(($SLURM_CPUS_ON_NODE / $SLURM_CPUS_PER_TASK * $SLURM_JOB_NUM_NODES)) "
chmod +x $cmdfile
#HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s )
#parallel --controlmaster --workdir --sshdelay 0.2 $PWD -S $HOSTLIST -j $(($SLURM_JOB_NUM_NODES*$SLURM_CPUS_ON_NODE/$SLURM_CPUS_PER_TASK)) --env run_blast run_blast ::: $(find -type f -name 'group*.fasta')
newcmd=/localscratch/$SLURM_JOBID/dummy_wrapper.sh
sbcast $cmdfile $newcmd
srun="srun --cpu-bind=q --mem-bind=q -n 1 -N1 --exclusive -c $SLURM_CPUS_PER_TASK --jobid $SLURM_JOBID --mem-per-cpu=$((SLURM_MEM_PER_NODE / SLURM_CPUS_ON_NODE))"
rm $cmdfile
cmdfile=$newcmd
### append a finishing token to the samples
samples+=('done')
parallel="parallel --no-notice -j $SLURM_NTASKS -P $SLURM_NTASKS "
$parallel $cmd ::: $(find $(pwd) -type f -name 'group*.fasta')
srun="srun --cpu-bind=q --mem-bind=q -n 1 -N1 --exclusive -c $SLURM_CPUS_PER_TASK --jobid $SLURM_JOBID --mem-per-cpu=$((SLURM_MEM_PER_NODE / SLURM_CPUS_ON_NODE))"
$parallel "$srun" "$cmdfile" ::: $(find $(pwd) -type f -name 'group*.fasta')
wait
set -x
n_unfinished_files=$(comm -3 <(cd output && find .| grep -o '[0-9]*' |sort ) <(cd scratch && find . | grep -o '[0-9]*' |sort )|wc -l)
if [ $n_unfinished_files -eq 0 ] && [[ $_arg_compress == "on" ]] && [ $XMLOUT -eq 1 ]; then
# shrink the alloction, such that only the minimum necessary is accounted for
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment