1

I am making web application which retrieves text within nested tag by scraping with goquery.

I want to display text and tag which contains the text literally such as "<'h1'>Hello World!<'/h1'>" in text form.

item - it is goquery.Selection - item.Text() and node, _ := item.Html(), fmt.Sprintf("%s",node) are used, But my html template always displayed scraped text in html form.

this is example which scrape theverge site and display into index.html template. enter image description here

I set url is theverge.com, and selector is class,

below is the source code in Go

Blockquote

,

func scrape(url, tag, selector, value string) string {
    container := ""
    res, e := http.Get(url)
    handleError(e)

    defer res.Body.Close()
    doc, err := goquery.NewDocumentFromReader(res.Body)
    handleError(err)
    var xpath string

    //I used xpath to get text with any selector.
    if len(tag) != 0 && len(selector) != 0 && len(value) != 0 {
        xpath = fmt.Sprintf("%s[%s=\"%s\"]", tag, selector, value)
    } else if len(tag) != 0 && len(selector) != 0 {
        xpath = fmt.Sprintf("%s[%s]", tag, selector)
    } else if len(selector) != 0 && len(value) != 0 {
        xpath = fmt.Sprintf("*[%s=\"%s\"]", selector, value)
    } else if len(selector) != 0 {
        xpath = fmt.Sprintf("*[%s]", selector)
    } else if len(tag) != 0 {
        xpath = fmt.Sprintf("%s", tag)
    } else if len(tag) == 0 && len(selector) == 0 && len(value) == 0 {
        xpath = "html"
    } else {
        xpath = "THISFORMATISNOTRIGHT"
        return xpath
    }
    fmt.Println(xpath)
    items := doc.Find(xpath)
    items.Each(func(i int, item *goquery.Selection) {
        node := item.Text()
        container += "<p>\"" + cleanString(node) + "\"</p>" + "\n\n"
    })
    container = "<p>\"" + container + "\"</p>"
    return container
}

How can i display html tag and innertext as a naive text?

JimB
  • 104,193
  • 13
  • 262
  • 255
QuavoHuncho
  • 51
  • 2
  • 4

0 Answers0