I was trying to write an iterator class in Python that can do a loop for a txt file, in while I would like to group all lines with identical value in the second column:
1 | A |
2 | A |
3 | B |
4 | B |
5 | B |
6 | C |
7 | C |
8 | C |
9 | C |
10 | D |
11 | D |
12 | D |
So I would like my iterator to return four list/tuple one at a time:
[[1,A],[2,A]]
[[3,B],[4,B],[5,B]]
[[6,C],[7,C],[8,C],[9,C]]
[[10,D],[11,D],[12,D]]
Here is my code:
#%% Iterator
class sequence(object):
def __init__(self, filePath):
self.file = open(filePath, 'r')
self.last = []
def __iter__(self):
return self
def __next__(self):
self.trunk = [self.last]
stop_checker = False
while not stop_checker:
line = self.file.readline()
if line: # a solid line
line = line.strip('\n').split('\t')
# Check if current line contains a difference contigs
if self.trunk == [[]]: # empty trunk, add a new line to it, read next
self.trunk=[line]
elif self.trunk[-1][1] == line[1]: # contig names matched:
self.trunk.append(line)
else: # First encounter of a difference contigs, reture th lastt trunk
self.last = line
return self.trunk
else:
raise StopIteration
return self.trunk
a = sequence('tst.txt')
for i in a:
print(i)
However, the iterator stops before return the last list, and the result is:
[['1', 'A'], ['2', 'A']]
[['3', 'B'], ['4', 'B'], ['5', 'B']]
[['6', 'C'], ['7', 'C'], ['8', 'C'], ['9', 'C']]