...
 
Commits (18)
......@@ -12,14 +12,16 @@ easyblock = 'Binary'
sources = ['./parallel_BLAST/LA_Wrapper',
'./parallel_BLAST/cleanup.sh',
'./parallel_BLAST/blast_wrap.sh',
'./parallel_BLAST/splitter.py']
'./parallel_BLAST/splitter.py',
'./parallel_BLAST/stage_in.sh']
unpack_sources = False
files_to_copy = ['LA_Wrapper',
'cleanup.sh',
'blast_wrap.sh',
'splitter.py']
'splitter.py',
'stage_in.sh']
postinstallcmds = ['mv %(installdir)s/parallel_BLAST/* %(installdir)s && rmdir %(installdir)s/parallel_BLAST']
......
This diff is collapsed.
......@@ -4,7 +4,10 @@ function cmdfilewriter()
cat <<EOF > $cmdfile
#!/bin/bash
module purge
#TODO: find a solution for the bug in BLAST+ AND to select the version by hand
module load bio/BLAST+/2.9.0-gompi-2019a
#module load bio/BLAST+/2.7.1-foss-2018a
# are we done?
source ${SCRIPT_PATH}/cleanup.sh
if [ \$1 = "done" ]; then
......@@ -16,15 +19,20 @@ cat <<EOF > $cmdfile
tmp_out=${JOBDIR}/\$outfname
trap "rm -f \$tmp_out" EXIT
START_BLAST=\$(date +%s)
$BLASTEXE -num_threads $SLURM_CPUS_PER_TASK -db $DATABASE $BLASTPARAMS -query \$1 -out \$tmp_out
$_arg_executable -num_threads $SLURM_CPUS_PER_TASK -db $DATABASE $BLASTPARAMS -query \$1 -out \$tmp_out
success=\$?
END_BLAST=\$(date +%s)
elapsed=\$(bc <<< "scale=1; \$((\$END_BLAST - \$START_BLAST))/60")
echo "Elapsed: \$elapsed"
#echo "Elapsed for '\$1': \$elapsed"
# compress, when done
gzip \$tmp_out
# copy back, when ready
mv \${tmp_out}.gz ./output/\${outfname}.gz
# only proceed, when ready
if [ \$success -eq 0 ]; then
# compress, when done
gzip \$tmp_out
# copy back, when ready
mv \${tmp_out}.gz ./output/\${outfname}.gz
fi
# we only consider the blast exit code for the total exit code
exit \$success
EOF
}
#!/usr/bin/env python
# dummy line to introduce a line break
import pip
# will take little time, if dependency is already satisfied
pip.main(['install', 'biopython'])
from Bio import SeqIO
import sys
import os
......@@ -22,7 +17,7 @@ batch = list()
for pos, entry in enumerate(record_iter):
if pos == 0:
group += 1
filename = 'group_%5d.fasta' % group
filename = 'group_%05d.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
if (pos % nlines == 0 and pos != 0):
count = SeqIO.write(batch, handle, 'fasta')
......@@ -30,10 +25,10 @@ for pos, entry in enumerate(record_iter):
handle.close()
batch = list()
group += 1
filename = 'group_%s.fasta' % group
filename = 'group_%05d.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
batch.append(entry)
# take care of the rest
count = SeqIO.write(batch, handle, 'fasta')
print('Wrote %s records to %s' % (count, filename))
#print('Wrote %s records to %s' % (count, filename))
handle.close()
......@@ -4,12 +4,9 @@ cat <<EOF > $stagefile
#!/bin/bash
target=/localscratch/$SLURM_JOB_ID/ramdisk
cd \$target
for fname in \$(find ${DATABASEPATH} -type f ); do
#suffix=\${fname#*.}
outfile=\$(basename \${fname})
cp -L \$fname \$outfile
done
parallel -j 4 cp {} {/} ::: \$(find -L ${DATABASEPATH} -type f )
cd -
wait
EOF
}
......
......@@ -534,7 +534,7 @@ if [ $_arg_paired -eq 1 ]; then
if [[ ${samples[0]} == *"_R1"* || ${samples[0]} == *"_R2"* ]]; then
first='_R1'
second='_R2'
elif [[ ${samples[0]} == *"_1"* ]]; then
elif [[ ${samples[0]} == *"_1"* ]] || [[ ${samples[0]} == *"_2"* ]]; then
first='_1'
second='_2'
else
......