I am exploring a data structure which get expands to sub-elements and resolves to a final element. But I only want to store top two levels.
Example: Lets say I start with New York which breaks into Bronx, Kings, New York, Queens, and Richmond as counties but then finally somehow they resolve to USA.
I am not sure if this is a good example but just to make it clear here is more clear explanation of the problem.
A (expands to) B,C,D -> B (expands to) K,L,M -> K resolves to Z
I initially wrote it in series of for loops and then use the recursion but in recursion I am loosing some of the elements that get expand and due to that I don't drill down each of the expanded element. I have put the both recursive version and non-recursive. I am looking for some advise on building this data structure, and what is the best way to do.
I call a data base query for every element in the expanded version which returns a list of items. Go until it resolves to single element. With out recursion I don't loose drilling all the way till the final element that others resolve to. But with recursion its not the same. I am also new to python so hopefully this is not a bad question to ask in a site like this.
returnCategoryQuery
is a method that returns list of items by calling the database query.
With out recursion
#Dictionary to save initial category with the rest of cl_to
baseCategoryTree = {};
#categoryResults = [];
# query get all the categories a category is linked to
categoryQuery = "select cl_to from categorylinks cl left join page p on cl.cl_from = p.page_id where p.page_namespace=14 and p.page_title ='";
cursor = db.cursor(cursors.SSDictCursor);
for key, value in idTitleDictionary.iteritems():
for startCategory in value[0]:
#print startCategory + "End of Query";
categoryResults = [];
try:
categoryRow = "";
baseCategoryTree[startCategory] = [];
print categoryQuery + startCategory + "'";
cursor.execute(categoryQuery + startCategory + "'");
done = False;
while not done:
categoryRow = cursor.fetchone();
if not categoryRow:
done = True;
continue;
categoryResults.append(categoryRow['cl_to']);
for subCategoryResult in categoryResults:
print startCategory.encode('ascii') + " - " + subCategoryResult;
for item in returnCategoryQuery(categoryQuery + subCategoryResult + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item;
for subItem in returnCategoryQuery(categoryQuery + item + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem;
for subOfSubItem in returnCategoryQuery(categoryQuery + subItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem;
for sub_1_subOfSubItem in returnCategoryQuery(categoryQuery + subOfSubItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem + " - " + sub_1_subOfSubItem;
for sub_2_subOfSubItem in returnCategoryQuery(categoryQuery + sub_1_subOfSubItem + "'"):
print startCategory.encode('ascii') + " - " + subCategoryResult + " - " + item + " - " + subItem + " - " + subOfSubItem + " - " + sub_1_subOfSubItem + " - " + sub_2_subOfSubItem;
except Exception, e:
traceback.print_exc();
With Recursion
def crawlSubCategory(subCategoryList):
level = 1;
expandedList = [];
for eachCategory in subCategoryList:
level = level + 1
print "Level " + str(level) + " " + eachCategory;
#crawlSubCategory(returnCategoryQuery(categoryQuery + eachCategory + "'"));
for subOfEachCategory in returnCategoryQuery(categoryQuery + eachCategory + "'"):
level = level + 1
print "Level " + str(level) + " " + subOfEachCategory;
expandedList.append(crawlSubCategory(returnCategoryQuery(categoryQuery + subOfEachCategory + "'")));
return expandedList;
#Dictionary to save initial category with the rest of cl_to
baseCategoryTree = {};
#categoryResults = [];
# query get all the categories a category is linked to
categoryQuery = "select cl_to from categorylinks cl left join page p on cl.cl_from = p.page_id where p.page_namespace=14 and p.page_title ='";
cursor = db.cursor(cursors.SSDictCursor);
for key, value in idTitleDictionary.iteritems():
for startCategory in value[0]:
#print startCategory + "End of Query";
categoryResults = [];
try:
categoryRow = "";
baseCategoryTree[startCategory] = [];
print categoryQuery + startCategory + "'";
cursor.execute(categoryQuery + startCategory + "'");
done = False;
while not done:
categoryRow = cursor.fetchone();
if not categoryRow:
done = True;
continue;
categoryResults.append(categoryRow['cl_to']);
#crawlSubCategory(categoryResults);
except Exception, e:
traceback.print_exc();
#baseCategoryTree[startCategory].append(categoryResults);
baseCategoryTree[startCategory].append(crawlSubCategory(categoryResults));