While scraping this link enter link description here , i just want to scrape library links, but the code I wrote extracts all the links, I couldn't manage to filter it. (I'm parsing the urls for later use in github api
, so I only need the path parts , but I don't want to parse the links that don't work for me to avoid unnecessary operations, so I only need library links)
type repo struct {
Link string `json:"link"`
Name string `json:"name"`
}
allRepos := make([]repo, 0)
collector := colly.NewCollector(
colly.AllowedDomains("github.com"))
collector.OnHTML("ul", func(e *colly.HTMLElement) {
r := repo{}
r.Link = e.ChildAttr("a", "href")
url, _ := url.Parse(r.Link)
repos := repo{
Link: url.Path,
}
allRepos = append(allRepos, repos)
})
collector.OnRequest(func(r *colly.Request) {
fmt.Println("Visiting", r.URL.String())
})
// Sends HTTP requests to the server
collector.Visit("https://github.com/avelino/awesome-go/blob/main/README.md")
fmt.Println(allRepos)
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", "\t")
//githubApi := "https://api.github.com/repos"
for _, repos := range allRepos {
fmt.Println(repos.Link)
}