Commit 5d9b50ab authored by Christian Meesters's avatar Christian Meesters
Browse files

Merge branch 'devel' into 'master'

Devel

See merge request !16
parents af3b0e6c 150a061e
# easyconfigfile for the ZDV taskfarm script
name = 'parallel_BLAST'
version = '0.4'
version = '0.5'
homepage = 'https://version.zdv.uni-mainz.de'
homepage = 'https://gitlab.rlp.net/hpc-jgu-lifescience/seq-analysis/'
description = 'script to accelerate NCBI BLAST runs on Mogon'
......
......@@ -19,7 +19,7 @@
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL CHRISTIAN MEESTERS OR THE JGU BE LIABLE
# ARE DISCLAIMED. IN NO EVENT SHALL CHRISTIAN MEESTERS OR THE JGU MAINZ BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
......@@ -43,15 +43,11 @@ module purge
# load the most current version of GNU parallel
module load tools/parallel
# load the most current version of BLAST +
#module load bio/BLAST+ # do not rely on most recent version
module load bio/BLAST+/2.7.1-foss-2018a
module load lang/Python/3.6.4-foss-2018a
### setup variable for THIS script; giving absolute path if necessary
SCRIPT="$0"
SCRIPT_VERSION="0.4"
SCRIPT_VERSION="0.5"
# TODO: delete the following 3 functions, once sbcast is working
function queue {
......@@ -102,6 +98,10 @@ error() {
(>&2 echo "ERROR: $1")
}
warning() {
(>&2 echo "WARNING: $1")
}
# THE DEFAULTS INITIALIZATION - POSITIONALS
_positionals=()
_arg_leftovers=()
......@@ -112,10 +112,11 @@ _arg_queue=nodeshort
_arg_assoc=$(sacct -nu $USER -o Account | tail -n1)
declare -i _arg_nodes=1
_arg_reservation=''
declare _arg_mem=1G
declare _memory_request=115500M
declare _arg_mem=0
declare -i _arg_blast_threads=1
_arg_blast_params=''
declare -i _arg_splitup_per_queryfile=20
declare -i _arg_splitup_per_queryfile=0
declare _arg_ramdisk=40G
_arg_blastdir='.'
_arg_executable='blastx'
......@@ -126,26 +127,26 @@ print_help ()
{
echo "This script's help msg"
printf 'Usage: %s [-l|--runlimit <arg>] [-p|--partition <arg>] [-s|--splitup <arg>] [-N|--nodes <arg>] [--executable <arg>] [-m|--mem <arg>] [--blastparams <string>] [-r|--ramdisk <arg>] [--blastdir <arg>] [--(no-)test] [-h|--help] <FASTA> <DATABASE>\n' "$(basename $0)\n"
printf 'HINT: The FASTA and DATABASE items need to be full paths to files.'
printf "\t%s\n" "<FASTA>: path to the query FASTA file"
printf "\t%s\n" "<DATABASE>: path to the database file"
printf "\t%s\n" "-l,--runlimit: runlimit default is 300 min, queue will be nodeshort, if <= 300 (default)"
printf "\t%s\n" "-p,--partition: queue (default is nodeshort)"
printf "\t%s\n" "-A,--account: queue (default is the last submit account; an error is triggered if none specified nor can be deduced)"
printf "\t%s\n" "-N,--nodes: number of nodes (1 is the default)"
printf "\t%s\n" "--reservation: reservation to use (none is the default)"
printf "\t%s\n" "--time: time in minutes (300 is the default)"
printf "\t%s\n" "-m,--mem: memory which is required per node (defaults to 115500 M, but should be min. 242500 M for blastn)"
printf "\t%s\n" "-r,--ramdisk: ramdisk size in units of GiB (default is 40 GiB)"
printf "\t%s\n" "-t,--threads: blast threads (default is 1)"
printf "\t%s\n" "--blastparams: blast parameters (default is -outfmt 5 (for xml output))"
printf "\t%s\n" "-s,--splitup: No. of FASTA sequences per query file (default is 20)"
printf "\t%s\n" "--blastdir: output directory (default is composition of input names)"
printf "\t%s\n" "--executable: choose executable (currently only from NCBI-BLAST, default: blastx)"
printf "\t%s\n" "--compress: if set, the output files will be merged and compressed (time consuming!, defaultt: off)"
printf "\t%s\n" "--test,--no-test: dry run, testing only (off by default)"
printf "\t%s\n" "--credits,--version: Prints credits and a brief version history and exits"
printf "\t%s\n" "-h,--help: Prints help"
printf 'HINT: The FASTA and DATABASE items need to be full paths to files.\n'
printf "\\t\\033[1m%s\\033[0m\\t\\t%s\\n" "<FASTA>" "path to the query FASTA file"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "<DATABASE>" "path to the database file"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-l,--runlimit" "runlimit default is 300 min, queue will be nodeshort, if <= 300 (default)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-p,--partition" "queue (default is nodeshort)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-A,--account" "SLURM account (default is the last submit account; an error is triggered if none specified nor can be deduced)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-N,--nodes" "number of nodes (1 is the default)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--reservation" "reservation to use (none is the default)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--time" "time in minutes (300 is the default)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-m,--mem" "memory which is required per node (defaults to 115500 M, but should be min. 242500 M for blastn, omit the unit for submitting)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-r,--ramdisk" "ramdisk size in units of GiB (default is 40 GiB)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-t,--threads" "blast threads (default is 1)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--blastparams" "blast parameters (default is -outfmt 6 (for blank tabulated output))"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-s,--splitup" "No. of FASTA sequences per query file (default is 20)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--blastdir" "output directory (default is composition of input names)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--executable" "choose executable (currently only from NCBI-BLAST, default: blastx)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--compress" "if set, the output files will be merged and compressed (time consuming!, defaultt: off)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--test,--no-test" "dry run, testing only (off by default)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--credits,--version" "Prints credits and a brief version history and exits"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-h,--help" "Prints help"
echo
echo -e "\e[3mWARNINGS:\e[0m"
echo -e "\e[3m- BLAST parameters:\e[0m"
......@@ -200,6 +201,12 @@ credits()
echo "- v0.3.2 -- 16. Jan. 2019 -- hot fix for new ramdisk and slurmstepd support"
echo "- v0.4 -- 06. Mar. 2019 -- refactored version:"
echo " - executables now pluggable"
echo "- v0.5 -- 17. Aug. 2019 -- fix: parser did not work for '--mem'-arg properly"
echo " update: - clearer UI"
echo " - better default memory settings"
echo " - faster stage-in for reference data"
echo " - automerge for -outfmt=6"
echo " - -outfmt=6 is now the default"
echo
echo "Current version is: $SCRIPT_VERSION"
echo
......@@ -423,11 +430,16 @@ FASTA=$_arg_fasta
DATABASE=$_arg_database
### check if query & database exist
if [ ! -e "$FASTA" ]; then
if [[ $_arg_test == "off" ]] && [ ! -e "$FASTA" ]; then
error "FASTA input was: '$FASTA' - no such file!"
exit 1
fi
if [[ $_arg_test == "off" ]] && [ ! -d "$DATABASE" ]; then
error "DATABASE input was: '$DATABASE' - no such directory!"
exit 1
fi
#TODO: differentiate between blastn,x,p -- for now, all are equal
if [ "blastx" = "${_arg_executable,,}" ]; then
executable="blastx"
......@@ -471,9 +483,26 @@ DB=${DATABASEID%.*}
JOBTAG="BLAST_${FA}_VS_${DB}"
# how many entries are there in the FASTA file?
echo "Checking input file"
nentries=$(grep '>' $FASTA | wc -l)
# we try to set the split number to a value, which ensures an output of
# ~ 10.000 split files
if [ $_arg_splitup_per_queryfile -ne 0 ]; then # the user thinks differently?
nsplits=$((nentries / _arg_splitup_per_queryfile))
if [ $nsplits -gt 50000 ]; then
error "There would be more than '$nsplits' files in scratch."
exit 1
elif [ $nsplits -gt 15000 ]; then
warning "There will be '$nsplits' files in scratch."
fi
else # infer the value
_arg_splitup_per_queryfile=$((nentries / 10000))
fi
# default values, see:
# https://www.ncbi.nlm.nih.gov/books/NBK279675/
DEFAULT_BLASTPARAMS='-outfmt 5'
DEFAULT_BLASTPARAMS='-outfmt 6'
# sanity check: '-outfmt' in blast parameters?
if [[ "$_arg_blastparams" =~ "outfmt" ]]; then
BLASTPARAMS=$_arg_blastparams
......@@ -483,8 +512,10 @@ fi
# test whether the output is xml or not
if [[ '-outfmt 5' =~ "$BLASTPARAMS" ]]; then
XMLOUT=1
else
OUTOUT=0
elif [[ '-outfmt 6' =~ "$BLASTPARAMS" ]]; then
XMLOUT=0
OUTOUT=1
fi
# TODO: port to M2
......@@ -550,15 +581,52 @@ fi
### which is the reference directory size?
_arg_ramdisk=$(du -shL --block-size=1M "$_arg_database" | cut -f1 )M
if [[ ! $SCRIPT == /* ]]; then
SCRIPT="$PWD/$SCRIPT";
fi
# which cluster are we on?
cluster=$(sacctmgr show cluster -p| tail -n1| cut -f1 -d '|')
# if the cluster is Mogon I, set the memory default accordingly:
if [ "$cluster" == "mogon" ]; then
if [ $_arg_mem -ne 0 ]; then # user tries to select a non-default memory
allowed_mem_setting="115500 242500 497500"
if [[ ! $allowed_mem_settings =~ (^|[[:space:]])"_arg_mem"($|[[:space:]]) ]]; then
error "Memory selection out to be one of [$allowed_mem_settings]"
fi
else # set a default memory
if [ "$_arg_executable" == "blastn" ]; then
_memory_request="242500M"
else
_memory_request="115500M"
fi
fi
else
if [ $_arg_mem -ne 0 ]; then # user tries to select a non-default memory
allowed_mem_setting="115500 242500 497500"
if [[ ! $allowed_mem_settings =~ (^|[[:space:]])"_arg_mem"($|[[:space:]]) ]]; then
error "Memory selection out to be one of [$allowed_mem_settings]"
fi
else # set a default memory
if [ "$_arg_executable" == "blastn" ]; then
_memory_request="246000M"
else
_memory_request="120000M"
fi
fi
fi
# how many entries are there in the FASTA file?
nentries=$(grep '>' $FASTA | wc -l)
# we try to set the split number to a value, which ensures an output of
# ~ 10.000 split files
### check if this script is on node by checking env-variable $SLURM_JOB_ID, else send it to SLURM with given parameters and exit
if [ -z "$SLURM_JOB_ID" ]; then
export SCRIPT_PATH=$(dirname $0)
submit_statement="sbatch --no-requeue -o ${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -A $_arg_assoc -t $_arg_runlimit -N $_arg_nodes -n $((64 * $_arg_nodes / $threads)) --mem=$_arg_mem --ramdisk=${_arg_ramdisk} -c $threads"
submit_statement="sbatch --no-requeue -o ${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -A $_arg_assoc -t $_arg_runlimit -N $_arg_nodes -n $((64 * $_arg_nodes / $threads)) --mem=$_memory_request --ramdisk=${_arg_ramdisk} -c $threads"
script_statement="$SCRIPT --partition $_arg_queue --account $_arg_assoc --nodes $_arg_nodes --time $_arg_runlimit --reservation=$_arg_reservation --threads $_arg_blast_threads --splitup $_arg_splitup_per_queryfile --blastparams=\"$BLASTPARAMS\" --executable=$_arg_executable $FASTA $DATABASE"
if [ -n "$_arg_reservation" ]; then
......@@ -598,18 +666,35 @@ RAMDISK=$JOBDIR/ramdisk
HOSTLIST=$(scontrol show hostname $SLURM_JOB_NODELIST | paste -d, -s | tr ',', ' ')
QUEUE=''
for HOST in $HOSTLIST; do
# when copying dereference putative links!
eval "ssh $HOST cp -Lr $DATABASEPATH $RAMDISK/. &"
PID=$!
queue $PID
# outcommented because of bug in slurm 16.05, see TODO-item
if [ -L ${DATABASEPATH} ]; then
warning "If the reference directory is a link, fast stage-in is not possible."
for fname in ${DATABASEPATH}/*; do
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
done
else
for fname in ${DATABASEPATH}/*; do
if [ -L "$fname" ]; then
eval "ssh $HOST cp -L $fname ${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
else
eval "ssh $HOST dd bs=4096 if=$fname of=${RAMDISK}/$(basename $fname)" &
PID=$!
queue $PID
fi
done
fi
# TODO: check for dereferencing links, before enabling
# TODO: check for performance, before re-enabling
#sbcast $FILE $RAMDISK/$(basename $FILE)
done
#DATABASE=$RAMDISK/$DATABASE
DATABASE=$RAMDISK/$(basename $DATABASEPATH)
DATABASE=$RAMDISK #/$(basename $DATABASEPATH)
WORKDIR=$PWD/$BLASTDIR/$SLURM_JOB_NAME
# this script may never output to a user's $HOME
......@@ -700,6 +785,24 @@ if [ $n_unfinished_files -eq 0 ] && [[ $_arg_compress == "on" ]] && [ $XMLOUT -e
#rm ./output/group_*.xml &
#rm -rf ./scratch &
wait
elif [ $n_unfinished_files -eq 0 ] && [[ $_arg_compress == "on" ]] && [ $OUTOUT -eq 1 ]; then
# shrink the alloction, such that only the minimum necessary is accounted for
#scontrol update job=$SLURM_JOB_ID NumNodes=1
pwd
# merge all xml files
STARTC=$(date +%s.%N)
outfile="${JOBTAG}.out"
# select the first of all files
some_file=$(find ./output -name 'group*' | head -n1)
# write anything to the output file
for split_file in ./output/group_*gz; do
zcat $split_file >> $outfile
done
pigz -p 16 $outfile &
ENDC=$(date +%s.%N)
elapsedc=$(bc <<< "scale=1; (($ENDC-$STARTC))/60")
rm -rf $WORKDIR/$SPLITFILEDIR &
wait
fi
# marks the end of this run
......
......@@ -3,8 +3,8 @@ function cmdfilewriter()
{
cat <<EOF > $cmdfile
#!/bin/bash
module load bio/BLAST+
module purge
module load bio/BLAST+/2.9.0-gompi-2019a
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
......
......@@ -16,7 +16,6 @@ function shrink_job {
keep+=($host)
done
set -x
# in the case of STAR we need to cleanup the empty hosts,
# before we can leave them over to slurm!
if [[ "STAR" = "$executable" ]]; then
......@@ -35,7 +34,6 @@ function shrink_job {
# ssh $node STAR --runThreadN 1 --genomeLoad Remove --genomeDir ${_arg_reference}
#done
fi
set +x
if [ ${#keep[@]} -lt ${#HOSTLIST[@]} ]; then
# now transform to a SLURM-readable nodelist
......
name = 'parallel_QATools'
version = '0.1'
version = '0.2'
homepage = 'https://gitlab.rlp.net/hpc-jgu-lifescience/seq-analysis'
......@@ -11,13 +11,15 @@ easyblock = 'Binary'
sources = ['./parallel_QATools/QAWrapper',
'./parallel_QATools/cleanup.sh',
'./parallel_QATools/fastqc_wrap.sh',
'./parallel_QATools/samtools_wrap.sh',
]
unpack_sources = False
files_to_copy = ['QAWrapper',
'cleanup.sh',
'fastqc_wrap.sh']
'fastqc_wrap.sh',
'samtools_wrap.sh']
postinstallcmds = ['mv %(installdir)s/parallel_QATools/* %(installdir)s && rmdir %(installdir)s/parallel_QATools']
......@@ -26,5 +28,6 @@ moduleclass = 'bio'
sanity_check_commands = ['bash -n']
sanity_check_paths = { 'files' : ['QAWrapper',
'cleanup.sh',
'fastqc_wrap.sh'] ,
'fastqc_wrap.sh',
'samtools_wrap.sh'] ,
'dirs' : [],}
......@@ -23,6 +23,7 @@ else
_arg_queue="parallel"
_arg_constraint="broadwell"
fi
_arg_executable="fastqc" # default
_arg_reservation=""
_arg_dependency=""
_arg_tag=""
......@@ -57,6 +58,7 @@ print_help ()
printf "\\t\\t\\t%s\\n" "- possible arguments: fastqc"
printf "\\t\\t\\t%s\\n" "- check is case insensitive"
printf "\\t\\t\\t%s\\n" "- defaults to 'fastqc'"
printf "\\t\\t\\t%s\\n" "- 'samtools' is possible and will run 'samtools flagstat' on '.bam' files"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-l,--runlimit" "runlimit default is 300 min"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-p,--partition" "SLURM partition"
printf "\\t\\t\\t%s\\n" "- default is 'parallel' on Mogon II and 'nodeshort' on Mogon I"
......@@ -89,6 +91,7 @@ credits()
echo "- v0.1 -- 08. Jan. 2019 -- release of the implementation for SLURM, supporting the"
echo " ability to run fastqc accross nodes."
echo "- v0.1.1 -- 28. Jan. 2019 -- minor bug fix: resource allocation improved"
echo "- v0.3 -- 02. Apr. 2019 -- added support for 'samtools flagstat'"
}
# function to redirect simple error messages to stderr
......@@ -276,28 +279,40 @@ fi
if [ "fastqc" = "${_arg_executable,,}" ]; then
executable="fastqc"
threads=1
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.gz -o -iname \*.fastq -o -iname \*.fq \) )
JOBTAG="QA_on_$(basename $_arg_inputdir)"
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname $SCRIPT)/fastqc_wrap.sh
else
source ${SCRIPT_PATH}/fastqc_wrap.sh
fi
elif [ "samtools" = "${_arg_executable,,}" ]; then
executable="samtools"
threads=4
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.bam \))
JOBTAG="QA_on_mapped_files_$(basename $_arg_inputdir)"
if [ -z "$SLURM_JOB_ID" ]; then
source $(dirname $SCRIPT)/samtools_wrap.sh
else
source ${SCRIPT_PATH}/samtools_wrap.sh
fi
else
error "executable '$_arg_executable' not recognized."
print_help
exit 2
fi
### what shall be our job tag? - overwrite if given by user
if [[ -z "${_arg_tag/ /}" ]]; then
_arg_tag="$JOBTAG"
fi
### special case for jgu-cbdm
if [[ "$cluster" = "mogon" && "$_arg_assoc" = "jgu-cbdm" ]]; then
_arg_queue="andrade,${_arg_queue}"
fi
### what shall be our job tag?
JOBTAG="QA_on_$(basename $_arg_inputdir)"
if [[ -z "${_arg_tag/ /}" ]]; then
_arg_tag="$JOBTAG"
fi
### check if output directory already exists
if [[ -z "${_arg_outdir// }" ]]; then
_arg_outdir=$(dirname $_arg_inputdir)/${JOBTAG}
......@@ -327,9 +342,10 @@ if [ "$cluster" = "mogon2" ] && [ -z "$SLURM_JOB_ID" ]; then
fi
fi
samples=$(find $(realpath $_arg_inputdir) -type f \( -iname \*.gz -o -iname \*.fastq -o -iname \*.fq \) )
JOBTAG="QA_on_$(basename $_arg_inputdir)"
### overwrite the number of threads, if really desired by the user
if [ -n "${_arg_threads}" ]; then
threads=$_arg_threads
fi
INPUTDIR=$_arg_inputdir
......@@ -349,31 +365,32 @@ if [ -z "$SLURM_JOB_ID" ]; then
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node" | bc -l))
fi
else
if [ $nsamples -lt 40 ]; then
if [ ${_arg_constraint} == "broadwell" ]; then
ntasks_per_node=40
else
ntasks_per_node=64
fi
### adjust ntasks for threads
ntasks_per_node=$((ntasks_per_node / threads))
### calculate the number of nodes to use
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node " | bc -l))
if [ $nnodes -eq 1 ] && [ $nsamples -lt 40 ]; then
_arg_queue="smp"
_arg_constraint="broadwell" # overwrite, if set to skylake
ntasks_per_node=$nsamples
else
if [ ${_arg_constraint} == "broadwell" ]; then
ntasks_per_node=40
else
ntasks_per_node=64
fi
# how many nodes do we need?
nnodes=$(printf %.0f $(echo "$nsamples / $ntasks_per_node" | bc -l))
_arg_queue="parallel"
fi
fi
### overwrite our calculation, if really desired by the user
if [ -n "${_arg_nodes}" ]; then
nodes=$_arg_nodes
nnodes=$_arg_nodes
fi
# safe guards
if [ $nnodes -eq 0 ]; then
nnodes=1
fi
#TODO: set threads, when a threaded tool becomes available
submit_statement="sbatch --no-requeue -o ${_arg_outdir}/${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -c ${threads:=1} -A $_arg_assoc -t $_arg_runlimit -N $nnodes -C ${_arg_constraint} --ntasks-per-node $ntasks_per_node"
submit_statement="sbatch --no-requeue -o ${_arg_outdir}/${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -c ${threads:=1} -A $_arg_assoc -t $_arg_runlimit -N $nnodes -C ${_arg_constraint} --ntasks-per-node $ntasks_per_node "
if [ -n "$_arg_dependency" ]; then
submit_statement="${submit_statement} --dependency=afterany:${_arg_dependency}"
......@@ -408,7 +425,7 @@ if [ -z "$SLURM_JOB_ID" ]; then
exit
else
jobid=$(eval $submit_statement)
echo ${jobid##* } # just report the numerical ID
echo ${jobid##* }:$nsamples:${_arg_outdir} # just report the numerical ID
exit
fi
fi
......
cmdfilewriter() {
cat <<EOF >$cmdfile
#!/bin/bash
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
shrink_job
exit
fi
module load bio/SAMtools/1.9
input=\$1
# which is the sample?
sample=\$(basename \$input)
outfile=$_arg_outdir/\${sample%.*}.log
samtools flagstat -@ ${threads:=1} \$input > \$outfile
EOF
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment