-1

I'm trying to isolate for $198 using regular expressions. I have a function on Google Sheets that searches URLs and the HTML using regular expressions and so far I've been successful isolating different information from the page, but I can't isolate for this price. I'm a beginner programmer so I'm mainly guessing and checking with regular expressions, so I'd really appreciate all the help I can get.

I've tried the below, yet it says it can't find the expression:

<strong>\$(.*)</strong> 

This is the line of code I'm trying to scrape as well as a screenshot:

<strong> $198 </strong>= $0 

screenshot

Nimantha
  • 6,405
  • 6
  • 28
  • 69
matufling
  • 7
  • 1
  • How about this: `\$\d+` – Cooper Feb 12 '20 at 23:14
  • You should avoid posting images of text. This makes it hard to search, harder for screen-reader users, and harder to verify results (for instance, it would make it clear about spaces on either side of the price) – Ryan M Feb 12 '20 at 23:27

3 Answers3

3

Try \$([0-9]*)\b or specifically <strong>\$([0-9]*)<\/strong>

The dollar amount you want is captured in Group 1. Depending on the language, you might want to search for how you could access Group 1 of the matched expression.

On your expression, you'd want to match numbers and not everything. So using [0-9], which specifies a single digit in that range is more specific. Also you must escape the slash in </strong> -> <\/strong>

If you need help evaluating RegEx anytime, use https://regex101.com/. It is a beautiful visual tool to test and learn regex.

Dinesh Kannan
  • 81
  • 1
  • 3
1

Regex Tester

Here's some code for a regex tester. Your welcome to it.

html:

<!DOCTYPE html>
<html>
  <head>
    <base target="_top">
    <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
  </head>
  <script>
  $(function(){
    google.script.run
    .withSuccessHandler(function(rObj){
      $('#text').val(rObj.text);
      $('#pattern').val(rObj.pattern);
      $('#results').css('background','white');
      if(rObj.g.toLowerCase()=='yes'){$('#set_g').prop('checked',true);}else{$('#set_g').prop('checked',false);}
      if(rObj.i.toLowerCase()=='yes'){$('#set_i').prop('checked',true);}else{$('#set_i').prop('checked',false);}
      if(rObj.m.toLowerCase()=='yes'){$('#set_m').prop('checked',true);}else{$('#set_m').prop('checked',false);}
    })
    .getLastTextPatternFlags();
  });

  function findData(){
    $('#results').css('background','yellow');
    $('#results').val('');
    var text=$('#text').val();
    var pattern=$('#pattern').val();
    var flags=getFlags();
    try{
      var regex=new RegExp(pattern,flags);
    }
    catch(e){
      console.error(e);
      $('#results').css('background','white');//This is test very much you should come back and look at this.
      $('#results').val('Check Error in Console Log');
    }
    //var result=regex.exec(text);
    result=text.match(regex);

    if(result){
      var rsltLog='';
      for(var i=0;i<result.length;i++){
        if(i>0){rsltLog+='\n'};
        rsltLog+='result[' + i + ']= ' + result[i];
      }
    }
    console.log('module: %s pattern: %s regex: %s flags: %s result: %s length: %s','findData()',pattern,regex,flags,rsltLog,result.length);
    try{
      if(result){
        $('#results').val(rsltLog);
      }else{
        $('#results').val("No Results");
      }
    }
      catch(e){
        console.error(e);
      }
      $('#results').css('background','white');
    }   

    function getFlags(){
      var g=$('#set_g').is(':checked');
      var i=$('#set_i').is(':checked');
      var m=$('#set_m').is(':checked');
      var flagsA=[];
      if(g){flagsA.push('g');}
      if(i){flagsA.push('i');}
      if(m){flagsA.push('m');}
      return flagsA.join('');
    }

    function saveText(){
      $('#text').css('background','yellow');
      var txt=$('#text').val();
      google.script.run
      .withSuccessHandler(function(){
        $('#text').css('background','white');
      })
      .saveText(txt);
      }

      function savePattern(){
      $('#pattern').css('background','yellow');
      var txt=$('#pattern').val();
      google.script.run
      .withSuccessHandler(function(){
        $('#pattern').css('background','white');
      })
      .savePattern(txt);
      }

      function saveFlags(){
        $('#results').css('background','yellow');
        var g=$('#set_g').is(':checked');
        var i=$('#set_i').is(':checked');
        var m=$('#set_m').is(':checked');
        var flagObj={g:'no',i:'no',m:'no'};
        if(g){flagObj.g='yes';}
        if(i){flagObj.i='yes';}
        if(m){flagObj.m='yes';}
        google.script.run
        .withSuccessHandler(function(){
          $('#results').css('background','white');
        })
        .saveFlags(flagObj);
      }
      console.log('My Code');
    </script>
    <style>
    .btns{margin:2px 2px 2px 0;}
    #container{width:100%;}
    </style>
  <body>
    <div id='container'>
    TEXT&nbsp;&nbsp;<input class="btns" type="button" value="Save Text" onClick="saveText();" />
    <br /><textarea id="text" placeholder="Enter the text to be searched" rows="4" cols="60"></textarea>
    <br />PATTERN&nbsp;&nbsp;<input class="btns" type="button" value="Save Pattern" onClick="savePattern();" />
    <br /><textarea id="pattern" placeholder="Enter the regex search expression" rows="4" cols="60"></textarea>
    <br />RESULTS
    <br /><textarea id="results" rows="4" cols="60"></textarea>
    <br /><input type="button" value="Search" onClick="findData();" />&nbsp;&nbsp;<input class="hostcontrol" type="button" value="Close" onClick="google.script.host.close();" />
    &nbsp;&nbsp;g&nbsp;&nbsp;<input id="set_g" type="checkbox" />
    &nbsp;&nbsp;i&nbsp;&nbsp;<input id="set_i" type="checkbox" />
    &nbsp;&nbsp;m&nbsp;&nbsp;<input id="set_m" type="checkbox" />
    &nbsp;&nbsp;<input type="button" value="Save Flags" onClick="saveFlags();" />
    &nbsp;&nbsp;<p>Don't leave extra carriage returns in search pattern textbox.</p>
    </div>
  </body>
</html>

GS:

function showRegexDialog(){
  var ui=HtmlService.createHtmlOutputFromFile('RegexTester').setWidth(800).setHeight(500);
  SpreadsheetApp.getUi().showModelessDialog(ui, 'Regex Tester');
}

function getLastTextPatternFlags(){
  var ss=SpreadsheetApp.getActive();
  var sh=ss.getSheetByName('Input');
  var rg=sh.getDataRange();
  var vA=rg.getValues();
  var rObj={};
  for(var i=0;i<vA.length;i++){
    rObj[vA[i][0]]=vA[i][1];
  }
  Logger.log(rObj);
  return rObj;
}

function saveText(txt){
  var ss=SpreadsheetApp.getActive();
  var sh=ss.getSheetByName('Input');
  var rg=sh.getDataRange();
  var vA=rg.getValues();
  for(var i=0;i<vA.length;i++){
    if(vA[i][0]=='text'){
      vA[i][1]=txt;
    }
  }
  rg.setValues(vA);
  return true;
}

function savePattern(txt){
  var ss=SpreadsheetApp.getActive();
  var sh=ss.getSheetByName('Input');
  var rg=sh.getDataRange();
  var vA=rg.getValues();
  for(var i=0;i<vA.length;i++){
    if(vA[i][0]=='pattern'){
      vA[i][1]=txt;
    }
  }
  rg.setValues(vA);
  return true;
}

function saveFlags(flagObj){
  var ss=SpreadsheetApp.getActive();
  var sh=ss.getSheetByName('Input');
  var rg=sh.getDataRange();
  var vA=rg.getValues();
  for(var i=0;i<vA.length;i++){
    var n=String(vA[i][0]).toLowerCase();
    if(n=='g' || n=='i' || n=='m'){
      vA[i][1]=flagObj[n];
    }
  }
  rg.setValues(vA);
  return true;
}

function doGet(){
  var output=HtmlService.createHtmlOutputFromFile('RegexTester');
  output.append('<style>.hostcontrol{display:none;}</style>');
  return output.setXFrameOptionsMode(HtmlService.XFrameOptionsMode.ALLOWALL);
}

The WebApp:

enter image description here

The spreadsheet:

enter image description here

Regular Expression Syntax

Cooper
  • 59,616
  • 6
  • 23
  • 54
0

It looks to me like there are spaces on either side of the price, so you should search for something like

<strong>\s*\$(.+?)\s*</strong> 

I added a lazy quantifier (the ?) to make it a bit more robust to grabbing more than you intended.

You could also limit it to digits:

<strong>\s*\$(\d+?)\s*</strong> 
Ryan M
  • 18,333
  • 31
  • 67
  • 74