52
const puppeteer = require("puppeteer");

(async function main() {
    try {
        const browser = await puppeteer.launch({headless: false});
        const page = await browser.newPage();
        page.setUserAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36");

        await page.goto("https://www.qimai.cn/rank/index/brand/all/genre/6014/device/iphone/country/us/date/2019-03-19", {waitUntil: 'load', timeout: 0});
        await page.waitForSelector(".container");
        const sections = await page.$$(".container");

        const freeButton = await page.$('[href="/rank/index/brand/free/device/iphone/country/us/genre/6014/date/2019-03-19"]');
        await freeButton.click();


        // free list
    
        const appTable = await page.waitForSelector(".data-table");
        const lis = await page.$$(".data-table > tbody > tr > td");

        // go to app content
        const appInfo = await page.$("a.icon");
        // appInfo.click();

        for (const content of lis) {
            const name = await content.$("div.appname");
            const gameName = await page.evaluate(name => name.innerText, name);
            console.log("Game Name: ", gameName);
        }
        
        console.log("-- bingo --");

    } catch (e) {
        console.log("our error", e);
    }
})();

I cant seem to get the text from <div class="appname">, and I'm getting this error:

TypeError: Cannot read property 'innerHTML' of null.

I have tried all ways, but it's not working.

This is the link to the website: https://www.qimai.cn/app/rank/appid/1451505313/country/us.

ggorlen
  • 44,755
  • 7
  • 76
  • 106
Koh Shuan Jin
  • 551
  • 1
  • 5
  • 8

8 Answers8

86

I use "waitForSelector" method and after that try to get the text

await page.waitForSelector('your selector')
let element = await page.$('your selector')
let value = await page.evaluate(el => el.textContent, element)
Edhar Dowbak
  • 2,648
  • 1
  • 10
  • 13
  • 28
    Or just use the method on element: `let value = await element.evaluate(el => el.textContent)` – Arlo Feb 17 '21 at 18:30
  • 2
    You can also use `const element = await page.waitForSelector('your selector')` and drop the `let element = await page.$('your selector')` line as shown [here](https://stackoverflow.com/a/66461236/6243352). – ggorlen Mar 15 '23 at 14:34
52

using waitForSelector and evaluate this becomes pretty clean

const element = await page.waitForSelector('your selector'); // select the element
const value = await element.evaluate(el => el.textContent); // grab the textContent from the element, by evaluating this function in the browser context
Ulad Kasach
  • 11,558
  • 11
  • 61
  • 87
23

The easiest way that I have managed to retrieve values from DOM selections with Puppeteer and jest is using the eval method.

Let's say I want the text value from a span.

// markup
<div class="target-holder">
    <span class="target">test</span>
</div>

// inside my e2e test file
const spanVal =  await page.$eval('.target-holder .target', el => el.innerText);

console.log(spanVal); // test

Official documentation link: https://pptr.dev/#?product=Puppeteer&version=main&show=api-pageevalselector-pagefunction-args

Zsolt Meszaros
  • 21,961
  • 19
  • 54
  • 57
Sergiu Mare
  • 1,552
  • 15
  • 17
6

If you're getting elements by XPath, just use the code above.

<span class="toggleable"> Random text.</span> 
// right click on this element -> copy -> copy XPath

const element = await page.$x('//thecopiedxpath');
const textObject = await element[0].getProperty('textContent');
const text = textObject._remoteObject.value;
console.log(text);

That will print the message "Random Text".

Gabriel Arruda
  • 487
  • 5
  • 10
3

If your goal is to receive text, you can make workaround with JS in DOM page.
Change this:

const lis = await page.$$(".data-table > tbody > tr > td");

const appInfo = await page.$("a.icon");

for (const content of lis) {
  const name = await content.$("div.appname");
  const gameName = await page.evaluate(name => name.innerText, name);
  console.log("Game Name: ", gameName);
}

To this:

const appInfo = await page.$("a.icon");

const texts = await page.evaluate(() => {
  const textsToReturn = [];

  const elems = Array.from(document.querySelectorAll('.data-table > tbody > tr > td'));

  for (const el of elems) {
   textsToReturn.push(el.querySelector('div.appname').innerText)
  }

  // If I'm not mistaken, puppeteer doesn't allow to return complicated data structures, so we'll stringify
  return JSON.stringify(textsToReturn)
})

// And here is your game names
console.log('Game names', JSON.parse(texts));

N.B: This code hasn't been tested on actual html page since there is no example.
But, you should get the concept of how to reimplement puppeteer logic with DOM native methods, to achieve the goal.

Grynets
  • 2,477
  • 1
  • 17
  • 41
  • `$$eval` is easier than `document.querySelectorAll()` and `appInfo` is never used here. You can simplify this to `await page.$$eval(".data-table td", els => els.map(e => e.querySelector(".appname").textContent))`. – ggorlen Mar 15 '23 at 14:56
2

From the documentation:

const tweetHandle = await page.$('.tweet .retweets');
expect(await tweetHandle.evaluate(node => node.innerText)).toBe('10');
Shimon S
  • 4,048
  • 2
  • 29
  • 34
1
//get the xpath of the element
const getXpathOfRecordLabel = await page.$x('//div');

//get the property of textContent
const getTheProperty = await getXpathOfRecordLabel[0].getProperty(
  'textContent'
);

//get the value
const getRecordName = getTheProperty._remoteObject.value;
console.log(getRecordName);
Amir Sikandar
  • 161
  • 1
  • 5
-1

Changing DOM through direct call is not desirable on front-end frameworks such as Angular, because these frameworks need the full control over DOM in order to work properly. However, manipulating DOM directly may cause unwanted errors or behaviors.

Long story short, don't use:
await element.evaluate(el => el.textContent); for Angular and such front-end frameworks/libraries. Use this instead:

await page.click("input[name=email]", {clickCount: 3})
await page.type("input[name=inputName]", "Input text")
Rzassar
  • 2,117
  • 1
  • 33
  • 55
  • I think you misunderstood the question. `await element.evaluate(el => el.textContent);` doesn't change anything, it just returns the text of a node, which is perfectly fine in Angular or any other framework. – ggorlen Mar 29 '23 at 13:23