4

I'm currently working with Puppeteer. I noted that in Chrome dev tools I'm able to get a selector from any element (copy/Copy Selector) and I was wondering if it would be possible to obtain a full css selector from an ElementHandle in Puppeteer as well.

For example, given the following html:

<body>
<h1>Main Title</h1>
</body>

Then, in Puppeteer I'm getting the h1 element:

const myElement=await page.$("h1");

I would like to get the full css selector of myElement (body > h1)

angrykoala
  • 3,774
  • 6
  • 30
  • 55

2 Answers2

4

Using this part of a userscript of mine:

var xpathNamespaceResolver = {
    svg: 'http://www.w3.org/2000/svg',
    mathml: 'http://www.w3.org/1998/Math/MathML'
};

getElementByXPath = function getElementByXPath(expression) {
    var a = document.evaluate(expression, document.body, xpathNamespaceResolver, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
    if (a.snapshotLength > 0) {
        return a.snapshotItem(0);
    }
};

retrieveCssOrXpathSelectorFromTextOrNode = function(arg, type) {
    var root = [], node;
    nodeType = type.toLowerCase();
    function retrieveNodeNameAndAttributes(node) {
        var output = '';
        try {
            var nodeName = node.nodeName.toLowerCase();
        } catch(e) {
            console.error('ERROR no matching node');
            return;
        }
        if (node.hasAttributes()) {
            var attrs = node.attributes;
            for (var i = 0; i < attrs.length; i++) {
                if (nodeType === 'xpath') {
                    if (attrs[i].value) {
                        output += '[@' + attrs[i].name + "='" + attrs[i].value + "']";
                    }
                    else {
                        output += '[@' + attrs[i].name + ']';
                    }
                }
                else if (nodeType === 'css') {
                    if (attrs[i].value) {
                        if (attrs[i].name === 'id') {
                            if (/:/.test(attrs[i].value)) {
                                output += "[id='" + attrs[i].value + "']"; // new Ex: [id="foo:bar"]
                            }
                            else {
                                output += "#" + attrs[i].value;
                            }
                        } else if (attrs[i].name === 'class') {
                            var classes = attrs[i].value.split(/\s+\b/).join('.');
                            output += '.' + classes;
                        } else {
                            output += "[" + attrs[i].name + "='" + attrs[i].value + "']";
                        }
                    }
                    else {
                        output += "[" + attrs[i].name + "]";
                    }
                }
            }
        }

        var txt = '';
        if (nodeName === 'a' && nodeType === 'xpath') {
            txt = "[text()='" + node.innerText + "']";
        }

        root.push({ 'name': nodeName, 'attrs': output, txt });

        if (nodeName === 'body') return;
        else retrieveNodeNameAndAttributes(node.parentNode); // recursive function
    }

    if (typeof arg === 'string') { // text from within the page
        var selector = '//*[text()[contains(.,"' + arg + '")]]';
        node = getElementByXPath(selector);
    } else if (typeof arg === 'object') { // node argument, let's do some 'duck typing'
        if (arg && arg.nodeType) {
            node = arg;
        }
        else {
            console.error("ERROR expected node, get object");
            return;
        }
    } else {
        console.error("ERROR expected node or string argumument");
        return;
    }

    retrieveNodeNameAndAttributes(node);

    var output = '';
    if (nodeType === 'css') {
        output = root.reverse().map(elt => elt.name + elt.attrs ).join(' > ');
    }
    else if (nodeType === 'xpath') {
        output = '//' + root.reverse().map(elt => elt.name + elt.txt + elt.attrs ).join('/');
    }
    else {
        console.error('ERROR unknown type ' + type);
    }

    return output;
    //console.log(output);

};


x = function(arg) {
    console.log("CSS\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'css'));
    console.log("XPath\n" + retrieveCssOrXpathSelectorFromTextOrNode(arg, 'xpath'));
};

Usage :

console.log(x(node));

Output on the selected textArea node from this page in Chrome Dev Tools :

CSS
body.question-page.new-topbar[style] > div.container._full  > div#content.snippet-hidden > div[itemscope][itemtype='http://schema.org/Question'] > div.inner-content.clearfix > div#mainbar[role='main'][aria-label='question and answers'] > div#answers > div#answer-49596712.answer[data-answerid='49596712'][itemscope][itemtype='http://schema.org/Answer'] > div.post-layout > div.answercell.post-layout--right > div.inline-editor[style] > form.inline-post[action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][method='post'][data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}'] > div#post-editor-49596712.post-editor.js-post-editor > div[style='position: relative;'] > div.wmd-container > textarea#wmd-input-49596712.wmd-input.processed[name='post-text'][cols='92'][rows='15'][tabindex='81'][data-min-length]

XPath
//body[@class='question-page new-topbar'][@style]/div[@class='container _full ']/div[@id='content'][@class='snippet-hidden']/div[@itemscope][@itemtype='http://schema.org/Question']/div[@class='inner-content clearfix']/div[@id='mainbar'][@role='main'][@aria-label='question and answers']/div[@id='answers']/div[@id='answer-49596712'][@class='answer'][@data-answerid='49596712'][@itemscope][@itemtype='http://schema.org/Answer']/div[@class='post-layout']/div[@class='answercell post-layout--right']/div[@class='inline-editor'][@style]/form[@class='inline-post'][@action='/posts/49596712/edit-submit/dbac2e78-20ec-4f98-86ee-60c3a57fb791'][@method='post'][@data-post-params='{"is_suggested_edit":false,"post_type":2,"owner":1,"PostId":49596712}']/div[@id='post-editor-49596712'][@class='post-editor js-post-editor']/div[@style='position: relative;']/div[@class='wmd-container']/textarea[@id='wmd-input-49596712'][@class='wmd-input processed'][@name='post-text'][@cols='92'][@rows='15'][@tabindex='81'][@data-min-length]
Gilles Quénot
  • 173,512
  • 41
  • 224
  • 223
  • After a bit of tinkering I got this to work. Would it be possible to get a bit of explanation on how this code works or where is this from? – angrykoala Apr 01 '18 at 09:57
  • It's a code of mine, intented to be a userscript. It iterate over each parent node to get both xpath and css selector (absolute notation) – Gilles Quénot Apr 01 '18 at 10:02
  • Do you mean it's broken ? If you made changes, can you show me ? – Gilles Quénot Apr 01 '18 at 10:09
  • Well I got errors with console.log calling the function before declaring it, I simply moved the logs to the end of x – angrykoala Apr 01 '18 at 10:13
  • 1
    Created a new repository in github, and updated my post with sample output https://github.com/sputnick-dev/retrieveCssOrXpathSelectorFromTextOrNode (fixed script) – Gilles Quénot Apr 01 '18 at 10:52
0

Just made some tests with: https://www.npmjs.com/package/puppeteer-element2selector

Works pretty good so far. I get pretty good looking unique selectors for ElementHandles:

.product-headline
.has-zoom .description-container:nth-child(1)

It uses https://github.com/antonmedv/finder under the surface.

musemind
  • 1,027
  • 9
  • 8