splitter.py 1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
#!/usr/bin/env python

from Bio import SeqIO
import sys
import os

fasta_file = sys.argv[1]
# number of sequences per scratch output file
nlines     = int(sys.argv[2])

print('Going to split: >%s<' % fasta_file)
print('Going to split in %s lines per output file.' % nlines)

record_iter = SeqIO.parse(open(fasta_file),'fasta')
group = 0
batch = list()
for pos, entry in enumerate(record_iter):
    if pos == 0:
        group += 1
20
        filename = 'group_%05d.fasta' % group
21 22 23 24 25 26 27
        handle = open(os.path.join('scratch', filename), 'w')
    if (pos % nlines == 0 and pos != 0):
        count = SeqIO.write(batch, handle, 'fasta')
        #print('Wrote %s records to %s' % (count, filename))
        handle.close()
        batch = list()
        group += 1
28
        filename = 'group_%05d.fasta' % group
29 30 31 32
        handle = open(os.path.join('scratch', filename), 'w')
    batch.append(entry)
# take care of the rest
count = SeqIO.write(batch, handle, 'fasta')
33
#print('Wrote %s records to %s' % (count, filename))
34
handle.close()