0

I'm attempting to web scrape the data https://www.publix.com/savings/weekly-ad/view-all, but upon opening the webpage you have to click the "Choose a Store" button, select the correct store, then select the button to switch into List View. Would I be able to automate this process using chromedp then scrape with Colly?

This is what I have so far and obviously it doesn't work due to the dynamic nature of the webpage

func (g *GroceryStore) ScrapeDeals() {
    // find and print all of the deals

    c := colly.NewCollector(colly.AllowedDomains("https://www.publix.com"))

    // hierarchy of values in html
    /*
        Name:
        div.p-card p-savings-card p-card--interactive
            div.content-wrapper
                div.top-section
                    div.title-wrapper
                        span.p-text paragraph-md normal context--default color--null line-clamp title
                            TEXT

        Sale Details:
        div.p-card p-savings-card p-card--interactive
            div.content-wrapper
                div.top-section
                    span.p-savings-badge savings-badge bogo
                        div.p-savings-badge__text
                            span.p-text paragraph-sm strong context--default color--null
                                TEXT
    */

    c.OnHTML("div.p-card p-savings-card p-card--interactive", func(e *colly.HTMLElement) {
        tempName := ""
        tempSaleDetails := ""
        e.ForEach("div.content-wrapper", func(i int, h *colly.HTMLElement) {
            h.ForEach("div.top-section", func(i int, t *colly.HTMLElement) {
                t.ForEach("div.title-wrapper", func(i int, m *colly.HTMLElement) {
                    m.ForEach("span.p-text paragraph-md normal context--default color--null line-clamp title", func(i int, l *colly.HTMLElement) {
                        tempName = l.Text //name of product
                    })
                })
                t.ForEach("span.p-savings-badge savings-badge bogo", func(i int, m *colly.HTMLElement) {
                    m.ForEach("div.p-savings-badge__text", func(i int, l *colly.HTMLElement) {
                        l.ForEach("span.p-text paragraph-sm strong context--default color--null", func(i int, n *colly.HTMLElement) {
                            tempSaleDetails = n.Text
                        })
                    })
                })
            })
            g.UpdateInventory(tempName, tempSaleDetails)
        })

    })

    // visit and scrape deals
    c.Visit("https://www.publix.com/savings/weekly-ad/view-all")

}

  • 1
    To me the page you're trying to visit is without any deals. Maybe it's better to simplify your question with a more famous web page that is easy to use for the repro steps. Thanks! – ossan Feb 14 '23 at 14:55

0 Answers0