I am trying to fetch Rally data by using its python library pyral. Sequentially the same code works, but its slow. I thought of using python multiprocess package, however my pool.apply method gets stuck and never executes. I tried running it in Pycharm IDE as well as the windows cmd prompt.
import pandas as pd
from pyral import Rally
from multiprocessing import Pool, Manager
from pyral.entity import Project
def process_row(sheetHeaders: list, item: Project, L: list):
print('processing row : ' + item.Name) ## this print never gets called
row = ()
for header in sheetHeaders:
row.append(process_cell(header, item))
L.append(row)
def process_cell(attr, item: Project):
param = getattr(item, attr)
if param is None:
return None
try:
if attr == 'Owner':
return param.__getattr__('Name')
elif attr == 'Parent':
return param.__getattr__('ObjectID')
else:
return param
except KeyError as e:
print(e)
# Projects
# PortfolioItem
# User Story
# Hierarchical Req
# tasks
# defects
# -------------MAIN-----------------
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<my_key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
p = Pool(1)
result = rally.get(sheetName, fetch=True, pagesize=10)
with Manager() as manager:
L = manager.list()
for item in result:
print('adding row for : ' + item.Name)
p.apply_async(func=process_row, args=(sheetHeaders, item, L)) ## gets stuck here
p.close()
p.join()
pd.DataFrame(L).to_excel(file, sheet_name=sheetName)
if __name__ == '__main__':
main()
Also tried without Manager list without any difference in the outcome
def main():
# Rally connection
rally = Rally('rally1.rallydev.com', apikey='<key>')
file = 'rally_data.xlsx'
headers = {
'Project': ['Name', 'Description', 'CreationDate', 'ObjectID', 'Parent', 'Owner', 'State'],
}
sheetName = 'Project'
sheetHeaders = headers.get(sheetName)
result = rally.get(sheetName, fetch=True, pagesize=10)
async_results = []
with Pool(50) as p:
for item in result:
print('adding row for : ' + item.Name)
async_results.append(p.apply_async(func=process_row, args=(sheetHeaders, item)))
res = [r.get() for r in async_results]
pd.DataFrame(res).to_excel(file, sheet_name=sheetName)