My coworker and I have the exact same code, using the same libraries, but yet his code works and mine doesn't. We've gotten stuck trying to figure out what is wrong. Any help would be greatly appreciated. The code and error are below.
Code:
import os
os.environ.update({'MALLET_HOME':r'C:...\\mallet-2.0.8/'})
mallet_path = 'C:...\\mallet-2.0.8\\bin\\mallet'
ldamallet = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=10, id2word=id2word)
Output and Error:
---------------------------------------------------------------------------
CalledProcessError Traceback (most recent call last)
<ipython-input-79-6122457c60e1> in <module>
----> 1 ldamallet = gensim.models.wrappers.LdaMallet(mallet_path, corpus=corpus, num_topics=10, id2word=id2word)
C:\ProgramData\Anaconda3\lib\site-packages\gensim\models\wrappers\ldamallet.py in __init__(self, mallet_path, corpus, num_topics, alpha, id2word, workers, prefix, optimize_interval, iterations, topic_threshold, random_seed)
129 self.random_seed = random_seed
130 if corpus is not None:
--> 131 self.train(corpus)
132
133 def finferencer(self):
C:\ProgramData\Anaconda3\lib\site-packages\gensim\models\wrappers\ldamallet.py in train(self, corpus)
270
271 """
--> 272 self.convert_input(corpus, infer=False)
273 cmd = self.mallet_path + ' train-topics --input %s --num-topics %s --alpha %s --optimize-interval %s '\
274 '--num-threads %s --output-state %s --output-doc-topics %s --output-topic-keys %s '\
C:\ProgramData\Anaconda3\lib\site-packages\gensim\models\wrappers\ldamallet.py in convert_input(self, corpus, infer, serialize_corpus)
259 cmd = cmd % (self.fcorpustxt(), self.fcorpusmallet())
260 logger.info("converting temporary corpus to MALLET format with %s", cmd)
--> 261 check_output(args=cmd, shell=True)
262
263 def train(self, corpus):
C:\ProgramData\Anaconda3\lib\site-packages\gensim\utils.py in check_output(stdout, *popenargs, **kwargs)
1916 error = subprocess.CalledProcessError(retcode, cmd)
1917 error.output = output
-> 1918 raise error
1919 return output
1920 except KeyboardInterrupt:
CalledProcessError: Command 'C:\mallet-2.0.8\bin\mallet import-file --preserve-case --keep-sequence --remove-stopwords --token-regex "\S+" --input C:\Users\CST~1.JEO\AppData\Local\Temp\84f7e0_corpus.txt --output C:\Users\CST~1.JEO\AppData\Local\Temp\84f7e0_corpus.mallet' returned non-zero exit status 1.