I am new to this multiprocessing concept. I am trying to implement multiprocessing to a spelling function to make it run faster. I tried as below but did not get results in previous order, token
here is the huge list of tokenized sentences.
from spellchecker import SpellChecker
from wordsegment import load, segment
from timeit import default_timer as timer
from multiprocessing import Process, Pool, Queue, Manager
def text_similarity_spellings(self, token):
"""Uses spell checker to separate incorrect spellings and correct them"""
spell = SpellChecker()
unknown_words = [list(spell.unknown(word)) for word in token]
known_words = [list(spell.known(word)) for word in token]
load()
segmented = [[segment(word) for word in sub] for sub in unknown_words]
flat_list = list(self.unpacker(segmented))
new_list = [[known_words[x], flat_list[x]] for x in range(len(known_words))]
new_list = list(self.unpacker(new_list))
newlist = [sorted(set(mylist), key=lambda x: mylist.index(x)) for mylist in new_list]
return newlist
def run_all(self):
tread_vta = Manager().list()
processes = []
arg_split = np.array_split(np.array(token),10)
arg_tr_cl = []
finds = []
trdclean1 = []
for count, k in enumerate(arg_split):
arg_tr_cl.append((k, [], tread_vta, token[t]))
for j in range(len(arg_tr_cl)):
p = Process(target= self.text_similarity_spellings, args=arg_tr_cl[j])
p.start()
processes.append(p)
for p in processes:
p.join()
Can anyone suggest me a better way to apply multiprocessing to a specific function and get results in correct order?