Commit 5d9b50ab authored by Christian Meesters's avatar Christian Meesters

Merge branch 'devel' into 'master'

Devel

See merge request !16
parents af3b0e6c 150a061e
# easyconfigfile for the ZDV taskfarm script
name = 'parallel_BLAST'
version = '0.4'
version = '0.5'
homepage = 'https://version.zdv.uni-mainz.de'
homepage = 'https://gitlab.rlp.net/hpc-jgu-lifescience/seq-analysis/'
description = 'script to accelerate NCBI BLAST runs on Mogon'
......
This diff is collapsed.
......@@ -3,8 +3,8 @@ function cmdfilewriter()
{
cat <<EOF > $cmdfile
#!/bin/bash
module load bio/BLAST+
module purge
module load bio/BLAST+/2.9.0-gompi-2019a
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
......
......@@ -16,7 +16,6 @@ function shrink_job {
keep+=($host)
done
set -x
# in the case of STAR we need to cleanup the empty hosts,
# before we can leave them over to slurm!
if [[ "STAR" = "$executable" ]]; then
......@@ -35,7 +34,6 @@ function shrink_job {
# ssh $node STAR --runThreadN 1 --genomeLoad Remove --genomeDir ${_arg_reference}
#done
fi
set +x
if [ ${#keep[@]} -lt ${#HOSTLIST[@]} ]; then
# now transform to a SLURM-readable nodelist
......
name = 'parallel_QATools'
version = '0.1'
version = '0.2'
homepage = 'https://gitlab.rlp.net/hpc-jgu-lifescience/seq-analysis'
......@@ -11,13 +11,15 @@ easyblock = 'Binary'
sources = ['./parallel_QATools/QAWrapper',
'./parallel_QATools/cleanup.sh',
'./parallel_QATools/fastqc_wrap.sh',
'./parallel_QATools/samtools_wrap.sh',
]
unpack_sources = False
files_to_copy = ['QAWrapper',
'cleanup.sh',
'fastqc_wrap.sh']
'fastqc_wrap.sh',
'samtools_wrap.sh']
postinstallcmds = ['mv %(installdir)s/parallel_QATools/* %(installdir)s && rmdir %(installdir)s/parallel_QATools']
......@@ -26,5 +28,6 @@ moduleclass = 'bio'
sanity_check_commands = ['bash -n']
sanity_check_paths = { 'files' : ['QAWrapper',
'cleanup.sh',
'fastqc_wrap.sh'] ,
'fastqc_wrap.sh',
'samtools_wrap.sh'] ,
'dirs' : [],}
......@@ -23,6 +23,7 @@ else
_arg_queue="parallel"
_arg_constraint="broadwell"
fi
_arg_executable="fastqc" # default
_arg_reservation=""
_arg_dependency=""
_arg_tag=""
......@@ -57,6 +58,7 @@ print_help ()
printf "\\t\\t\\t%s\\n" "- possible arguments: fastqc"
printf "\\t\\t\\t%s\\n" "- check is case insensitive"
printf "\\t\\t\\t%s\\n" "- defaults to 'fastqc'"
printf "\\t\\t\\t%s\\n" "- 'samtools' is possible and will run 'samtools flagstat' on '.bam' files"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-l,--runlimit" "runlimit default is 300 min"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-p,--partition" "SLURM partition"
printf "\\t\\t\\t%s\\n" "- default is 'parallel' on Mogon II and 'nodeshort' on Mogon I"
......@@ -89,6 +91,7 @@ credits()
echo "- v0.1 -- 08. Jan. 2019 -- release of the implementation for SLURM, supporting the"
echo " ability to run fastqc accross nodes."
echo "- v0.1.1 -- 28. Jan. 2019 -- minor bug fix: resource allocation improved"
echo "- v0.3 -- 02. Apr. 2019 -- added support for 'samtools flagstat'"
}
# function to redirect simple error messages to stderr
......@@ -276,28 +279,40 @@ fi
if [ "fastqc" = "${_arg_executable,,}" ]; then
executable="fastqc"
threads=1
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.gz -o -iname \*.fastq -o -iname \*.fq \) )
JOBTAG="QA_on_$(basename $_arg_inputdir)"
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname $SCRIPT)/fastqc_wrap.sh
else
source ${SCRIPT_PATH}/fastqc_wrap.sh
fi
elif [ "samtools" = "${_arg_executable,,}" ]; then
executable="samtools"
threads=4
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.bam \))
JOBTAG="QA_on_mapped_files_$(basename $_arg_inputdir)"
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname $SCRIPT)/samtools_wrap.sh
else
source ${SCRIPT_PATH}/samtools_wrap.sh
fi
else
error "executable '$_arg_executable' not recognized."
print_help
exit 2
fi
### what shall be our job tag? - overwrite if given by user
if [[ -z "${_arg_tag/ /}" ]]; then
_arg_tag="$JOBTAG"
fi
### special case for jgu-cbdm
if [[ "$cluster" = "mogon" && "$_arg_assoc" = "jgu-cbdm" ]]; then
_arg_queue="andrade,${_arg_queue}"
fi
### what shall be our job tag?
JOBTAG="QA_on_$(basename $_arg_inputdir)"
if [[ -z "${_arg_tag/ /}" ]]; then
_arg_tag="$JOBTAG"
fi
### check if output directory already exists
if [[ -z "${_arg_outdir// }" ]]; then
_arg_outdir=$(dirname $_arg_inputdir)/${JOBTAG}
......@@ -327,9 +342,10 @@ if [ "$cluster" = "mogon2" ] && [ -z "$SLURM_JOB_ID" ]; then
fi
fi
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.gz -o -iname \*.fastq -o -iname \*.fq \) )
JOBTAG="QA_on_$(basename $_arg_inputdir)"
### overwrite the number of threads, if really desired by the user
if [ -n "${_arg_threads}" ]; then
threads=$_arg_threads
fi
INPUTDIR=$_arg_inputdir
......@@ -349,31 +365,32 @@ if [ -z "$SLURM_JOB_ID" ]; then
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node" | bc -l))
fi
else
if [ $nsamples -lt 40 ]; then
if [ ${_arg_constraint} == "broadwell" ]; then
ntasks_per_node=40
else
ntasks_per_node=64
fi
### adjust ntasks for threads
ntasks_per_node=$((ntasks_per_node / threads))
### calculate the number of nodes to use
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node " | bc -l))
if [ $nnodes -eq 1 ] && [ $nsamples -lt 40 ]; then
_arg_queue="smp"
_arg_constraint="broadwell" # overwrite, if set to skylake
ntasks_per_node=$nsamples
else
if [ ${_arg_constraint} == "broadwell" ]; then
ntasks_per_node=40
else
ntasks_per_node=64
fi
# how many nodes do we need?
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node" | bc -l))
_arg_queue="parallel"
fi
fi
### overwrite our calculation, if really desired by the user
if [ -n "${_arg_nodes}" ]; then
nodes=$_arg_nodes
nnodes=$_arg_nodes
fi
# safe guards
if [ $nnodes -eq 0 ]; then
nnodes=1
fi
#TODO: set threads, when a threaded tool becomes available
submit_statement="sbatch --no-requeue -o ${_arg_outdir}/${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -c ${threads:=1} -A $_arg_assoc -t $_arg_runlimit -N $nnodes -C ${_arg_constraint} --ntasks-per-node $ntasks_per_node"
submit_statement="sbatch --no-requeue -o ${_arg_outdir}/${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -c ${threads:=1} -A $_arg_assoc -t $_arg_runlimit -N $nnodes -C ${_arg_constraint} --ntasks-per-node $ntasks_per_node "
if [ -n "$_arg_dependency" ]; then
submit_statement="${submit_statement} --dependency=afterany:${_arg_dependency}"
......@@ -408,7 +425,7 @@ if [ -z "$SLURM_JOB_ID" ]; then
exit
else
jobid=$(eval $submit_statement)
echo ${jobid##* } # just report the numerical ID
echo ${jobid##* }:$nsamples:${_arg_outdir} # just report the numerical ID
exit
fi
fi
......
cmdfilewriter() {
cat <<EOF >$cmdfile
#!/bin/bash
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
shrink_job
exit
fi
module load bio/SAMtools/1.9
input=\$1
# which is the sample?
sample=\$(basename \$input)
outfile=$_arg_outdir/\${sample%.*}.log
samtools flagstat -@ ${threads:=1} \$input > \$outfile
EOF
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment