Commit fc11b360 authored by Christian Meesters's avatar Christian Meesters

outsourced splitting script

parent 1994a1dd
#!/usr/bin/env python
# dummy line to introduce a line break
import pip
# will take little time, if dependency is already satisfied
pip.main(['install', 'biopython'])
from Bio import SeqIO
import sys
import os
fasta_file = sys.argv[1]
# number of sequences per scratch output file
nlines = int(sys.argv[2])
print('Going to split: >%s<' % fasta_file)
print('Going to split in %s lines per output file.' % nlines)
record_iter = SeqIO.parse(open(fasta_file),'fasta')
group = 0
batch = list()
for pos, entry in enumerate(record_iter):
if pos == 0:
group += 1
filename = 'group_%s.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
if (pos % nlines == 0 and pos != 0):
count = SeqIO.write(batch, handle, 'fasta')
#print('Wrote %s records to %s' % (count, filename))
handle.close()
batch = list()
group += 1
filename = 'group_%s.fasta' % group
handle = open(os.path.join('scratch', filename), 'w')
batch.append(entry)
# take care of the rest
count = SeqIO.write(batch, handle, 'fasta')
print('Wrote %s records to %s' % (count, filename))
handle.close()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment