0

This python script does some data stuff on a product list coming from a sqlite table...the commented out for loop works as expected but the Multiprocessing loop does not work at all... i can see the processess being fired but the script just halts. any help ?

import sqlite3 as lite
import sys
import pandas as pd
import datetime
from datetime import date
import time
from Levenshtein import *
import multiprocessing as mp
import copy

    def getProducts():
        con = None
        try:
            con = lite.connect('pm.db', check_same_thread=False)
            con.row_factory = lite.Row
            cur = con.cursor()
            cur.execute("SELECT  * FROM products" )
            rows = cur.fetchall()

        except lite.Error, e:
            print "Error %s:" % e.args[0]
            sys.exit(1)

        finally:
            if con:
                con.close()
        return rows

    def test_mp(row):
        print row

    dictArray = []
    counter = 0

    rows = getProducts()
    #for row in rows:
        #counter += 1
        #print 'product {count} from {max}'.format(count=counter, max=len(rows))
        #dictArray.extend(test_mp(row))

    pool = mp.Pool(10)
    for ret in pool.imap(test_mp, rows):
        print 'Done processing product'
        dictArray.extend(ret)

    pool.terminate()
Hakim
  • 1,242
  • 1
  • 10
  • 22

1 Answers1

0

This is how i fixed it.. apparently the array of sqliteRows rows is not playing well inside the pool.imap function...i used another row factory to create a generic dict.

def dict_factory(cursor, row):
    d = {}
    for idx, col in enumerate(cursor.description):
        d[col[0]] = row[idx]
    return d

in getProducts:

        con.row_factory = dict_factory

credits go to How can I get dict from sqlite query?

Community
  • 1
  • 1
Hakim
  • 1,242
  • 1
  • 10
  • 22