I am trying to Create index by whoosh of 150 MB file. But it is showing the error list index out of range: I have quote the line which is responsible of error. That is for x in range(len(id)):
. Logically Index record will be equivalent to ID number of the documents.
from whoosh import index
from whoosh.fields import Schema,ID, TEXT,NUMERIC
from whoosh import index
from whoosh.index import create_in
id = []
body = []
Score = []
count=0
doc_path='C:/Users/Abhi/Desktop/My_Experiments_with_truth/extracted_xml.txt'
with open(doc_path,'r+',encoding="utf8") as line:
for f in line:
count=count+1
if f.startswith('Id : '):
a = f.replace('Id : ','')
id.append(a)
#print(a)
elif f.startswith('body : '):
b = f.replace('body : ','')
body.append(b)
#print(b)
elif f.startswith('Score :'):
c = f.replace('Score :','')
Score.append(c)
#print(c)
if not os.path.exists("index"):
os.mkdir("index")
#design the Schema
schema=Schema(id_details=ID(stored=True),body_details=TEXT(stored=True),Score_details=NUMERIC(stored=True))
print(schema)
#creation of the index
ix = index.create_in("index", schema)
writer = ix.writer()
#Opening writer
for x in range(len(id)):
writer.add_document(id_details=id[x],body_details=body[x],Score_details=Score[x])
writer.commit()
print("Index created")