Commit 574ecf9b authored by Christian Meesters's avatar Christian Meesters

simpler sanity checking - not yet perfect

parent c3403b0b
......@@ -635,53 +635,31 @@ if [[ -z "$SLURM_CPUS_PER_TASK" ]]; then
declare -i SLURM_CPUS_PER_TASK=1
fi
# NOTE: This check is kind of redundant, better check whether we can get rid of it.
# get *nal file from database to obtain a database prefix for blast
if [[ $BLASTEXE =~ .*blastn.* ]]; then
tmp=$(find $DATABASE -name '*.nhr' | head -n1)
# is there more than 1 suffix (or more than 2 splittable items)
suffixes="${tmp//[^\.]} "
if [ $suffixes -gt 2 ]; then
DATABASE=${tmp%.*}
else
DATABASE=$tmp
fi
# this will not be there, if makeblastdb did not split the reference up
#if [[ -z "${tmp// }" ]]; then
# tmp=$(find $DATABASE -name '*.nhr'| head -n1)
# DATABASE=${tmp%.00.nhr}
#else
# # define database for blast as the full directory + prefix (without *nal* suffix)
# DATABASE=${tmp%.*}
#fi
elif [[ $BLASTEXE =~ .*blastp.* ]]; then
tmp=$(find $DATABASE -name '*.phr' | sort | head -n1)
DATABASE=${tmp%.phr}
elif [[ $BLASTEXE =~ .*blast.* ]]; then
tmp=$(find $DATABASE -name '*.phr' | sort | head -n1)
DATABASE=${tmp%.phr}
# this will not be there, if makeblastdb did not split the reference up
#if [[ -z "${tmp// }" ]]; then
# tmp=$(find $DATABASE -name '*.nhr'| head -n1)
# DATABASE=${tmp%.paa}
#else
# # define database for blast as the full directory + prefix (without *nal* suffix)
# DATABASE=${tmp%.*}
#fi
fi
if [[ -z $DATABASE ]]; then
error "Unable to recognize database, please get in touch with hpc@uni-mainz.de"
fi
# TODO: check whether this function can be re-enabled
#function run_blast() {
#module load bio/BLAST+
#
#$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $RAMDISK/$(basename $DATABASE) -query ./$1 $BLASTPARAMS -out ./output/$(basename ${1%fasta}out)
#}
# see whether we find a typical file in the db
tmp=$(find $DATABASE -name '*00*hr')
# remove the 2nd suffix
DATABASE=${tmp%.*}
function run_blast() {
cd $(dirname $(realpath $0))
#TODO: check for xml extension
outfname=$(basename ${1%fasta}xml)
tmp_out=$JOBDIR/$outfname
trap "rm -f $tmp_out" EXIT
START_BLAST=$(date +%s.%N)
$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $RAMDISK/$(basename $DATABASE) -query ./$1 $BLASTPARAMS -out $tmp_out
END_BLAST=$(date +%s.%N)
elapsed=$(bc <<< "scale=1; (($END_BLAST-$START_BLAST))/60")
#TODO: enable logging in verbose mode
#echo \"Elapsed: \"$elapsed'
#compress when done
gzip $tmp_out
mv ${tmp_out}.gz ./output/${outfname}.gz
}
#export -f run_blast
cmd=$PWD/cmd_file.sh
......@@ -700,7 +678,7 @@ echo '$outfname' >> $cmd
echo 'trap "rm -f $tmp_out" EXIT' >> $cmd
echo 'START_BLAST=$(date +%s.%N)' >> $cmd
printf "$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $DATABASE $BLASTPARAMS " >> $cmd
echo '-query ./$1 -out $tmp_out' >> $cmd
echo '-query $1 -out $tmp_out' >> $cmd
# marks the end of this run
echo 'END_BLAST=$(date +%s.%N)' >> $cmd
echo 'elapsed=$(bc <<< "scale=1; (($END_BLAST-$START_BLAST))/60")' >> $cmd
......@@ -713,6 +691,11 @@ chmod +x $cmd
trap "rm -f $cmd" EXIT
# define our srun-dropin for the subsequent call to be distributed amoung the reserved nodes
srun="srun -n 1 -N1 --exclusive -c $SLURM_CPUS_PER_TASK --jobid $SLURM_JOBID --mem-per-cpu=$((SLURM_MEM_PER_NODE / SLURM_NTASKS))"
# likewise the correct number of semaphores
parallel="parallel --no-notice -j $(($SLURM_CPUS_ON_NODE / $SLURM_CPUS_PER_TASK * $SLURM_JOB_NUM_NODES)) "
HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s )
#parallel --controlmaster --workdir --sshdelay 0.2 $PWD -S $HOSTLIST -j $(($SLURM_JOB_NUM_NODES*$SLURM_CPUS_ON_NODE/$SLURM_CPUS_PER_TASK)) --env run_blast run_blast ::: $(find -type f -name 'group*.fasta')
parallel --workdir $PWD --sshdelay 0.1 -S $HOSTLIST -j $(($SLURM_CPUS_ON_NODE / $SLURM_CPUS_PER_TASK)) $cmd ::: $(find $(pwd) -type f -name 'group*.fasta')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment