Commit ed3b2c3e authored by Christian Meesters's avatar Christian Meesters

first modularization: splitter script now outsourced and addressed by variable

parent e24c6be6
......@@ -532,49 +532,6 @@ if [[ ! $SCRIPT == /* ]]; then
SCRIPT="$PWD/$SCRIPT";
fi
### check if this script is on node by checking env-variable $LSB_JOBID, else send it to LSF with given parameters and exit
SPLITUPSCRIPT=$(cat <<'EOF'
# dummy line to introduce a line break
import pip
# will take little time, if dependency is already satisfied
pip.main(['install', 'biopython'])
from Bio import SeqIO
import sys
import os
fasta_file = sys.argv[1]
# number of sequences per scratch output file
nlines = int(sys.argv[2])
print('Going to split: >%s<' % fasta_file)
print('Going to split in %s lines per output file.' % nlines)
record_iter = SeqIO.parse(open(fasta_file),'fasta')
group = 0
batch = list()
for pos, entry in enumerate(record_iter):
if pos == 0:
group += 1
filename = 'group_%s.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
if (pos % nlines == 0 and pos != 0):
count = SeqIO.write(batch, handle, 'fasta')
#print('Wrote %s records to %s' % (count, filename))
handle.close()
batch = list()
group += 1
filename = 'group_%s.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
batch.append(entry)
# take care of the rest
count = SeqIO.write(batch, handle, 'fasta')
print('Wrote %s records to %s' % (count, filename))
handle.close()
EOF
)
### check if this script is on node by checking env-variable $SLURM_JOB_ID, else send it to SLURM with given parameters and exit
if [ -z "$SLURM_JOB_ID" ]; then
submit_call="sbatch --no-requeue -o ${JOBTAG}_%j.out -J $JOBTAG -p $_arg_queue -A $_arg_assoc -t $_arg_runlimit -N $_arg_nodes -n $((64 * $_arg_nodes / $_arg_blast_threads)) --mem=$_arg_mem --ramdisk=${_arg_ramdisk} -c $_arg_blast_threads --reservation=$_arg_reservation $SCRIPT --partition $_arg_queue --account $_arg_assoc --nodes $_arg_nodes --time $_arg_runlimit --reservation=$_arg_reservation --threads $_arg_blast_threads --splitup $_arg_splitup_per_queryfile --blastparams=\"$BLASTPARAMS\" --executable=$_arg_executable $FASTA $DATABASE"
......@@ -636,7 +593,7 @@ if [ ! -d "$WORKDIR/$SPLITFILEDIR" ]; then
mkdir -p "$WORKDIR/output" || exit 1;
cd "$WORKDIR"
echo "executing scratch generator on $FASTA ($_arg_splitup_per_queryfile entries per file)"
eval "python -c \"$SPLITUPSCRIPT\" $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile
eval "${SCRIPT_PATH}/splitter.py $FASTA $_arg_splitup_per_queryfile" & # splitup queryfile
PID=$!
queue $PID
fi
......
File mode changed from 100644 to 100755
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment