1.I was trying to write a python code to get all contents of files in each subfolder and create a index for each content (file contents). All the contents for each file can be get successfully. However, when I run the code, it always shows an error message Exception: This file is already closed.
2.Here is the code for building an index for each content, could someone explain to me why this thing could happened? The traceback:
python-input-49-38a47b2f8c0c> in <module>
39 print(searcher)
40
---> 41 writers.commit(optimize=True)
42
43 # from whoosh.query import *
~/.local/lib/python3.8/site-packages/whoosh/writing.py in commit(self, mergetype, optimize, merge)
928 else:
929 # Close segment files
--> 930 self._close_segment()
931 # Write TOC
932 self._commit_toc(finalsegments)
~/.local/lib/python3.8/site-packages/whoosh/writing.py in _close_segment(self)
841 def _close_segment(self):
842 if not self.perdocwriter.is_closed:
--> 843 self.perdocwriter.close()
844 if not self.fieldwriter.is_closed:
845 self.fieldwriter.close()
~/.local/lib/python3.8/site-packages/whoosh/codec/whoosh3.py in close(self)
265 for writer in self._colwriters.values():
266 writer.finish(self._doccount)
--> 267 self._cols.save_as_files(self._storage, self._column_filename)
268
269 # If vectors were written, close the vector writers
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in save_as_files(self, storage, name_fn)
295
296 def save_as_files(self, storage, name_fn):
--> 297 for name, blocks in self._readback():
298 f = storage.create_file(name_fn(name))
299 for block in blocks():
~/.local/lib/python3.8/site-packages/whoosh/filedb/compound.py in _readback(self)
276
277 yield (name, gen)
--> 278 temp.close()
279 self._tempstorage.delete_file(self._tempname)
280
~/.local/lib/python3.8/site-packages/whoosh/filedb/structfile.py in close(self)
121
122 if self.is_closed:
--> 123 raise Exception("This file is already closed")
124 if self.onclose:
125 self.onclose(self)
Exception: This file is already closed
import os
import codecs
import whoosh
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT,textdata=TEXT(stored=True))
ix = create_in("folder", schema)
filelist = []
for root, dirs, files in os.walk("./test_result"):
for file in files:
#append the file name to the list
filelist.append(os.path.join(root,file))
#print all the file names
writer = ix.writer()
i = 0
for name in filelist:
i = i +1
with codecs.open (name, "r",encoding='utf-8',
errors='ignore') as myfile:
text=myfile.read()
# print ("adding document "+name)
writer.add_document(title="document "+name, path="folder",content=text,textdata=text)
myfile.close()
print(text)
searcher = ix.searcher()
print(searcher)
writers.commit(optimize=True)