...
 
Commits (5)
......@@ -140,7 +140,7 @@ print_help ()
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-r,--ramdisk" "ramdisk size in units of GiB (default is 40 GiB)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-t,--threads" "blast threads (default is 1)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--blastparams" "blast parameters (default is -outfmt 6 (for blank tabulated output))"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-s,--splitup" "No. of FASTA sequences per query file (default is 20)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "-s,--splitup" "No. of FASTA sequences per query file (default is to generate ~5000 files)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--blastdir" "output directory (default is composition of input names)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--executable" "choose executable (currently only from NCBI-BLAST, default: blastx)"
printf "\\t\\033[1m%s\\033[0m\\t%s\\n" "--compress" "if set, the output files will be merged and compressed (time consuming!, defaultt: off)"
......@@ -175,10 +175,14 @@ credits()
{
echo "The original implementation (2013/2014) was written by Christoph Martin (ZDV, UNIX group)"
echo "Benjamin Rieger (Institut für Molekulargenetik) contributed a perl implementation"
echo "of a format conform splitting of FASTA files."
echo "of a format conform splitting of FASTA files, which is not used, anymore."
echo "The original implemenation was a LSF chain job. It was eventually adopted and maintained by"
echo "Christian Meesters (ZDV, HPC group) from 2017 onwards."
echo
echo "I am particularly grateful for their feedback to:"
echo "- Lukas Hellman (AG Hankeln)"
echo "- Benjamin Rieger (NGS Facility)"
echo
echo "History of the re-implementation:"
echo "- v0.1 -- 27. Sep. 2017 -- release of the re-implementation for SLURM supporting the"
echo " ability to compute accross nodes."
......@@ -201,7 +205,7 @@ credits()
echo "- v0.3.2 -- 16. Jan. 2019 -- hot fix for new ramdisk and slurmstepd support"
echo "- v0.4 -- 06. Mar. 2019 -- refactored version:"
echo " - executables now pluggable"
echo "- v0.5 -- 17. Aug. 2019 -- fix: parser did not work for '--mem'-arg properly"
echo "- v0.5 -- 21. Aug. 2019 -- fix: parser did not work for '--mem'-arg properly"
echo " update: - clearer UI"
echo " - better default memory settings"
echo " - faster stage-in for reference data"
......@@ -494,10 +498,10 @@ if [ $_arg_splitup_per_queryfile -ne 0 ]; then # the user thinks differently?
error "There would be more than '$nsplits' files in scratch."
exit 1
elif [ $nsplits -gt 15000 ]; then
warning "There will be '$nsplits' files in scratch."
warning "There will be '$nsplits' files in scratch -- resulting in poor performance."
fi
else # infer the value
_arg_splitup_per_queryfile=$((nentries / 10000))
_arg_splitup_per_queryfile=$((nentries / 5000))
fi
# default values, see:
......
......@@ -22,7 +22,7 @@ batch = list()
for pos, entry in enumerate(record_iter):
if pos == 0:
group += 1
filename = 'group_%s.fasta' % group
filename = 'group_%5d.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
if (pos % nlines == 0 and pos != 0):
count = SeqIO.write(batch, handle, 'fasta')
......