The task is kinda simple - extract an image from Word document. Since I got a lot of files like more than 10000. I decide to implement multiprocessing for this task.
import docx2txt
import os
from multiprocessing import Pool
ABS_PATH = os.path.dirname(os.path.realpath(__file__))
def extract_image(docs):
for root, dirs, filenames in os.walk(docs):
for f in filenames:
directory = os.path.join(ABS_PATH, "images/")
docx2txt.process("%s%s" % (docs, f), directory)
def get_docs():
source = os.path.join(ABS_PATH, 'docs/')
return source
if __name__ == "__main__":
docs = get_docs()
pool = Pool()
pool.map(extract_image, docs)
pool.close()
pool.join()
I expect that images will be extracted to docs folder, but I get:
PermissionError: [Errno 13] Permission denied: '\pagefile.sys'