In these situations what you can do is collect all of the urls from the parse_all_scholarships
method into a list or collection of some sort, and instead of firing off a request for each of the urls in a for loop, what you can do create your item for the current university and assign the urn
and then pop off just one of the links in your list of urls, send it in a request with the parse_scholarship_details
method as the callback and pass along the item and the remaining list of urls in the cb_kwargs
parameter.
Then inside of the parse_scholarship_details
method you can collect all the necessary information for that specific scholarship. Add the info to the item you passed in the cb_kwargs, and then pop another url off the list and duplicate the same request you passed in the parse_all_scholarships
method with the same item and remaining urls.
This will create a chain of requests for each of the scholarship pages which you will one by one add to your scholarship item. Then you can add an if statement at the end that checks if the list of urls is empty and if it is then and only then should you yield the Scholarship Item.
For example:
import scrapy
class UniversityItem(scrapy.Item):
uni = scrapy.Field()
scholarships = scrapy.Field()
class university_spider(scrapy.Spider):
name = "test_scholarship_spider"
start_urls = [
"https://search.studyaustralia.gov.au/scholarship/search-results.html?pageno=1",
]
def parse(self, response):
for div in response.css("div.sr_p.brd_btm"):
full_scholarship_detail_url = div.xpath('.//div[@class="rs_cnt"]/a/@href').get()
if full_scholarship_detail_url:
uni_name = div.css("h2 a::text").get()
yield response.follow(url=full_scholarship_detail_url, callback=self.parse_all_scholarships, cb_kwargs={"uni": uni_name})
def parse_all_scholarships(self, response, uni=None):
links = [link for link in response.xpath("//div/h3/a/@href").getall()]
item = UniversityItem(uni=uni, scholarships=[])
yield response.follow(url=links.pop(), callback=self.parse_scholarship_detail, cb_kwargs={"item": item, "links": links})
def parse_scholarship_detail(self, response, item=None, links=None):
scholarship = {"name": response.css('h1::text').get()}
scholarship['eligibility_requirements'] = "multiple requirements that will be scrapped using selectors."
scholarship['application_process'] = "multiple processes that will be scrapped using selectors."
item["scholarships"].append(scholarship)
if len(links) > 0:
yield response.follow(url=links.pop(), callback=self.parse_scholarship_detail, cb_kwargs={"item": item, "links": links})
else:
yield item
Partial Output:
{
"uni": "Deakin College",
"scholarships": [
{
"name": "Deakin College International Scholarship - Vietnam",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Deakin College International Scholarship - China, Korea and Japan",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Deakin College International Scholarships",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},
{
"uni": "Melbourne Polytechnic",
"scholarships": [
{
"name": "International Online Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "International Higher Education Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Pathway to Victoria Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},
{
"uni": "Griffith College",
"scholarships": [
{
"name": "Diploma of Health Care Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Welcome to Queensland Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Accommodation Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},
{
"uni": "Charles Darwin University",
"scholarships": [
{
"name": "Australia Awards",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Menzies International Academic Support Package",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "2023 CDU International College Pathway Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "CDU Global Merit Scholarship 2023-2024",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Dili International School Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Vice-Chancellor\u2019s International High Achievers Scholarships (VCIHAS)",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},
{
"uni": "James Cook University",
"scholarships": [
{
"name": "Tom and Dorothy Cook Scholarships in Public Health and Tropical Medicine",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "International Merit Stipend",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "International Excellence Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "JCU Postgraduate Research Scholarships",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "JCU Vice Chancellor's International Student Scholarship",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "HEB Memorial Bursary",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "John and Janice King Bursary",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},
{
"uni": "Western Sydney University",
"scholarships": [
{
"name": "Vice-Chancellor's Academic Excellence Undergraduate Scholarships",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Western Sydney International Scholarships \u2013 Undergraduate",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "China Scholarship Council Research Scholarships",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Vice-Chancellor's Academic Excellence Postgraduate Scholarships",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
},
{
"name": "Western Sydney International Scholarships \u2013 Postgraduate",
"eligibility_requirements": "multiple requirements that will be scrapped using selectors.",
"application_process": "multiple processes that will be scrapped using selectors."
}
]
},