This is my code for the problem set week 6 DNA. When I test with the small.csv it works correctly but when testing with the large.csv it seems to incorrectly count the repeating sequence. Can anyone help me find the error in my code? I am very new to this.
import csv
import sys
if len(sys.argv) != 3:
sys.exit("Usage: python dna.py STRcounts DNASequence")
check = True
STRlist = []
Humanlist = []
# copy person list
with open(sys.argv[1],"r") as STR:
readSTR = csv.reader(STR)
for row in readSTR:
if check:
STRlist.append(row)
check = False
else:
Humanlist.append(row)
Slist = STRlist[0]
Slist.remove("name")
# print(Humanlist)
# print(Slist)
seq=[]
# copy sequence
with open(sys.argv[2],"r") as text:
readtext = csv.reader(text)
for i in readtext:
seq = i
text = seq[0]
# print(text)
# create dictionary for STR
STRdict = {}
for STR in Slist:
STRdict[STR] = 0
for STR in Slist:
for letter in range(len(text)):
if STR == text[letter:letter+len(STR)]:
STRdict[STR] += 1
check = False
for human in range(len(Humanlist)):
for STR in range(len(Slist)):
if str(STRdict[Slist[STR]]) == str(Humanlist[human][STR+1]):
check = True
else:
check = False
break
if check:
print(Humanlist[human][0])
break
if not check:
print("no match")