1

I have this html in string, I want to modify the src tags such a way that I want to keep the values but append a string at the end of the attribute

HTML before modification

<html>
  <head>
    <title>LDRA Testbed Results</title>
  </head>
  <script> 
    var AddTxt = true;
    var TBpublish = true;
  </script>
  <frameset cols="206,*" marginwidth="0" marginheight="0"
FRAMEBORDER="no" BORDERCOLOR="#DEDEEF">
     <frame src="./App_xyz_P_tbresults/App_xyz_P.lfm.htm" name="lFrame" NORESIZE>
     <frame src="htm_idx/frontpage.htm" name="rFrame" FRAMEBORDER="no">
  </frameset> 
</html>

HTML after modification

<html>
   <head>
     <title>LDRA Testbed Results</title>
   </head>
   <script> 
      var AddTxt = true; 
      var TBpublish = true;
   </script>
   <frameset cols="206,*" marginwidth="0" marginheight="0" FRAMEBORDER="no" BORDERCOLOR="#DEDEEF">
      <frame src="./App_xyz_P_tbresults/App_xyz_P.lfm.htm:A:1" name="lFrame" NORESIZE>
      <frame src="htm_idx/frontpage.htm:A:1" name="rFrame" FRAMEBORDER="no">
    </frameset> 
</html>

How can I do that in Node.js

Current code This is how I'm currently parsing the html code using the parser

var html = chunk.toString();
    var temp = html;

    html = parse(html);
    var modifiedHTML = [];
    var elementsTag = [
      "<a ",
      "<area ",
      "<base ",
      "<body ",
      "<frame ",
      "<iframe ",
      "<img ",
      "<input ",
      "<link ",
      "<script ",
      "<audio ",
      "<embed ",
      "<source ",
      "<track ",
      "<video "
    ];

    var elements = [
      "a",
      "area",
      "base",
      "body",
      "frame",
      "iframe",
      "img",
      "input",
      "link",
      "script",
      "audio",
      "embed",
      "source",
      "track",
      "video"
    ];

    for (var i = 0; i < elementsTag.length; i++) {
      var count = (temp.match(new RegExp(elementsTag[i], "g")) || []).length;

      for (var j = 0; j < count; j++) {
        var elementAttr = html.querySelector(elements[i]);

        if (elementAttr != null) {
          // console.log(elementAttr);
          if (Object.keys(elementAttr.attributes).length > 0) {
            Object.keys(elementAttr.attributes).forEach(attribute => {
              if (attribute == "src" || attribute == "href") {
                var attributeVal = elementAttr.attributes[attribute];
                // console.log(attributeVal[j]);
                attributeVal = attributeVal + `:${project_name}:${type}`;
                elementAttr.setAttribute(attribute, attributeVal);
              }
            });
          }
          modifiedHTML.push(elementAttr.outerHTML);
        }
      }
    }

    console.log(modifiedHTML);

Output:

[
[0]   '<frame src="./App_xyz_P_tbresults/App_xyz_P.lfm.htm:TEST:static_analysis_report1:TEST:static_analysis_report1" name="lFrame" NORESIZE></frame>',
[0]   '<frame src="./App_xyz_P_tbresults/App_xyz_P.lfm.htm:TEST:static_analysis_report1:TEST:static_analysis_report1:TEST:static_analysis_report1" name="lFrame" NORESIZE></frame>'
[0] ]

1 Answers1

0

It is not a trivial task to parse HTML in JavaScript. I didn't check your attempt but you should really use an existing HTML parser instead of doing your own.

You can use for example jsdom to achieve what you want:

const jsdom = require("jsdom");
const { JSDOM } = jsdom;

let html = `<html>
  <head>
    <title>LDRA Testbed Results</title>
  </head>
  <script> 
    var AddTxt = true;
    var TBpublish = true;
  </script>
  <frameset cols="206,*" marginwidth="0" marginheight="0"
FRAMEBORDER="no" BORDERCOLOR="#DEDEEF">
     <frame src="./App_xyz_P_tbresults/App_xyz_P.lfm.htm" name="lFrame" NORESIZE>
     <frame src="htm_idx/frontpage.htm" name="rFrame" FRAMEBORDER="no">
  </frameset> 
</html>`;

const dom = new JSDOM(html);

let frameset_children = dom.window.document.body.children;
for (let i = 0; i < frameset_children.length; i++) {
  let frame = frameset_children[i];
  frame.src = frame.src + ':A:1';
}

console.log(dom.serialize())
TGrif
  • 5,725
  • 9
  • 31
  • 52
  • Hi, how can I modify src or href attribute for all the elements in the dom?? –  Feb 17 '20 at 17:26
  • I suppose you can do it the same way, parsing the whole document body, with maybe some verification to identify the nodes you want to modify. You can probably use jQuery like in [this answer](https://stackoverflow.com/a/55133813/5156280) to make it even easier for you. – TGrif Feb 17 '20 at 19:03