I am making web application which retrieves text within nested tag by scraping with goquery.
I want to display text and tag which contains the text literally such as "<'h1'>Hello World!<'/h1'>" in text form.
item - it is goquery.Selection - item.Text() and node, _ := item.Html(), fmt.Sprintf("%s",node) are used, But my html template always displayed scraped text in html form.
this is example which scrape theverge site and display into index.html template. enter image description here
I set url is theverge.com, and selector is class,
below is the source code in Go
Blockquote
,
func scrape(url, tag, selector, value string) string {
container := ""
res, e := http.Get(url)
handleError(e)
defer res.Body.Close()
doc, err := goquery.NewDocumentFromReader(res.Body)
handleError(err)
var xpath string
//I used xpath to get text with any selector.
if len(tag) != 0 && len(selector) != 0 && len(value) != 0 {
xpath = fmt.Sprintf("%s[%s=\"%s\"]", tag, selector, value)
} else if len(tag) != 0 && len(selector) != 0 {
xpath = fmt.Sprintf("%s[%s]", tag, selector)
} else if len(selector) != 0 && len(value) != 0 {
xpath = fmt.Sprintf("*[%s=\"%s\"]", selector, value)
} else if len(selector) != 0 {
xpath = fmt.Sprintf("*[%s]", selector)
} else if len(tag) != 0 {
xpath = fmt.Sprintf("%s", tag)
} else if len(tag) == 0 && len(selector) == 0 && len(value) == 0 {
xpath = "html"
} else {
xpath = "THISFORMATISNOTRIGHT"
return xpath
}
fmt.Println(xpath)
items := doc.Find(xpath)
items.Each(func(i int, item *goquery.Selection) {
node := item.Text()
container += "<p>\"" + cleanString(node) + "\"</p>" + "\n\n"
})
container = "<p>\"" + container + "\"</p>"
return container
}
How can i display html tag and innertext as a naive text?