1

I am trying to count the number of matches to a set of regular expressions in survey responses on Qualtrics (e.g. whenever people use "I think...", "In my opinion,", etc., the count increases by one).

Here is the JavaScript I wrote to do this:

const DONT_PHRASES_ARR = ["dont"," don't"," do not"," can not"," cant"," can't"];
const DONT_PHRASES = DONT_PHRASES_ARR.join("|");
const PRONOUNS_ARR = ["he","she","it","they"];
const PRONOUNS = PRONOUNS_ARR.join("|");
const PRESIDENT_NAMES_ARR = ["candidate","clinton","donald","gop","hillary","hilary","trump","trum"];
const PRESIDENT_NAMES = PRESIDENT_NAMES_ARR.join("|");
const SKIP_WORDS_ARR = ["also"," really"," very much"];
const SKIP_WORDS = SKIP_WORDS_ARR.join("|");


const AMBIGUOUS_WORDS_ARR = ["seemed","prefer"];
const AMBIGUOUS_WORDS = AMBIGUOUS_WORDS_ARR.join("|");
const I_OPINION_WORDS_ARR = ["agree","believe","consider","disagree","hope","feel","felt","find","oppose","think","thought","support"];
const I_OPINION_WORDS = I_OPINION_WORDS_ARR.join("|");
const OPINION_PHRASES_ARR = ["in my opinion","it seems to me","from my perspective","in my view","from my view","from my standpoint","for me"];
const OPINION_PHRASES = OPINION_PHRASES_ARR.join("|");
const OPINION_PHRASE_REGEXES = [
  {label: "op1", regex: new RegExp(`(?:i(?:${DONT_PHRASES}|${SKIP_WORDS})? (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
  {label: "op2", regex: new RegExp(`(?:i'm [a-z]+ to (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
  {label: "op3", regex: new RegExp(`(?:${OPINION_PHRASES},? )`, 'i')}
];


const STRONG_FACT_WORDS_ARR = ["are","can't","demonstrate","demonstrates","did","had","is","needs","should","will","would"];
const STRONG_FACT_WORDS = STRONG_FACT_WORDS_ARR.join("|");
const WEAKER_FACT_WORDS_ARR = ["were","was","has"];
const WEAKER_FACT_WORDS = WEAKER_FACT_WORDS_ARR.join("|");
const FACT_WORDS_ARR = WEAKER_FACT_WORDS_ARR.concat(STRONG_FACT_WORDS_ARR);
const FACT_WORDS = FACT_WORDS_ARR.join("|");
const FACT_PHRASES = new RegExp();
const FACT_PHRASE_REGEXES = [
  {label: "fp1", regex: new RegExp(`(?:[tT]he [A-Z][a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
  {label: "fp2", regex: new RegExp(`(?:(?:^|.+\. )[A-Z][a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
  {label: "fp3", regex: new RegExp(`(?:[tT]he [^\.]*[A-Z][a-z]+\'s? [a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
  {label: "fp4", regex: new RegExp(`(?:(?:[^\.]*${PRONOUNS}) (?:\b${STRONG_FACT_WORDS}\b) )`, 'gi')},
  {label: "fp5", regex: new RegExp(`(?:(?:^|\. )${PRONOUNS} (?:\b${FACT_WORDS}\b) )`, 'gi')},
  {label: "fp6", regex: new RegExp(`(?:(?:^|[^.]* )(?:\b${PRESIDENT_NAMES}\b) (?:\b${FACT_WORDS}\b) )`, 'gi')},
  {label: "fp7", regex: new RegExp(`(?:(?:^|[^.]* )(?:${PRONOUNS}|${PRESIDENT_NAMES}) [a-z]+(?:ed|[^ia]s) )`, 'gi')},
  {label: "fp8", regex: new RegExp(`(?:(?:^|[^.]* )(?:${PRONOUNS}|${PRESIDENT_NAMES}) [a-z]+ [a-z]+(?:ed|[^ia]s) )`, 'gi')},
  {label: "fp9", regex: new RegExp(`(?:(?:^|\. )(?:She\'s|He\'s) )`, 'gi')}
];

const history = [];


function analyze() {
  const response = document.getElementById('input').value.trim();
     
    const response_sentences = response.match(/[^\.!\?]+[ \.!\?]+/g);

    var i = 0
        while (i < response_sentences.length){
          let response_words = response_sentences[i].toString().split(' ')
               .map((w) => w.toLowerCase().replace(/[\W]/g, ''));
            i++;
          }
    
  
let fact_phrases = [];
let opinion_phrases = [];
var j = 0;
while (j < response_sentences.length) {
  // Check for opinion phrases
  let opinion_match = false;
  for (let {label, regex} of OPINION_PHRASE_REGEXES) {
    if (response_sentences[j].match(regex)) {
      opinion_match = true;
      op_match_phr = response_sentences[j].match(regex);
      opinion_phrases.push({match: op_match_phr[0], label: label, sent: j});
      break;
    }
  }
  if (!opinion_match) {
    // This sentence does not contain an opinion phrase
    let fact_match = false;
    for (let {label, regex} of FACT_PHRASE_REGEXES) {
      if (response_sentences[j].match(regex)) {
        fact_match = true;
        fact_match_phr = response_sentences[j].match(regex);
        fact_phrases.push({match: fact_match_phr[0], label: label, sent: j});
        break;
      }
    }
  }
  j++;
}


    const opinion_phrases_uniq = opinion_phrases.filter((obj, index) =>
      opinion_phrases.findIndex((item) => item.sent === obj.sent) === index
    );
   
    let opinion_phrases_uniq_t2 = opinion_phrases_uniq.length;
  
    let fact_phrases_t2 = fact_phrases.length;
    
    const fact_phrases_uniq = fact_phrases.filter((obj, index) =>
      fact_phrases.findIndex((item) => item.sent === obj.sent) === index
    );
    console.log('FACT PHRASES UNIQUE:', fact_phrases_uniq)
  


    let fact_phrases_uniq_t2 = fact_phrases_uniq.length;
    
    let net_score = opinion_phrases_uniq_t2 - fact_phrases_uniq_t2;


    if (response !== 'NA') {
      console.log('-------------------------');
      // console.log('RESPONSE: ', response);
      console.log('OPINION PHRASES:', opinion_phrases_uniq_t2);
      console.log('FACT PHRASES:', fact_phrases_uniq_t2);
      console.log('NET SCORE:', net_score);
    }
  
  
 const result = {
      input: response,
      net_score: net_score,
      opinion_phrases_t2: opinion_phrases_uniq_t2,
      fact_phrases_t2: fact_phrases_uniq_t2,
      opinion_phrases: opinion_phrases,
      fact_phrases: fact_phrases
    };
  
    // Display result
    const output = document.getElementById('output');
    output.textContent = `Net score: ${net_score}\nOpinion phrases: ${opinion_phrases_uniq_t2}\nFact phrases: ${fact_phrases_uniq_t2}`;
  }

I've tried to implement the script in Qualtrics like this:

{
    /*Place your JavaScript here to run when the page loads*/

});

Qualtrics.SurveyEngine.addOnReady(function analyze() {
  
  const DONT_PHRASES_ARR = ["dont"," don't"," do not"," can not"," cant"," can't"];
  const DONT_PHRASES = DONT_PHRASES_ARR.join("|");
  const PRONOUNS_ARR = ["he","she","it","they"];
  const PRONOUNS = PRONOUNS_ARR.join("|");
  const PRESIDENT_NAMES_ARR = ["candidate","clinton","donald","gop","hillary","hilary","trump","trum"];
  const PRESIDENT_NAMES = PRESIDENT_NAMES_ARR.join("|");
  const SKIP_WORDS_ARR = ["also"," really"," very much"];
  const SKIP_WORDS = SKIP_WORDS_ARR.join("|");


  const AMBIGUOUS_WORDS_ARR = ["seemed","prefer"];
  const AMBIGUOUS_WORDS = AMBIGUOUS_WORDS_ARR.join("|");
  const I_OPINION_WORDS_ARR = ["agree","believe","consider","disagree","hope","feel","felt","find","oppose","think","thought","support"];
  const I_OPINION_WORDS = I_OPINION_WORDS_ARR.join("|");
  const OPINION_PHRASES_ARR = ["in my opinion","it seems to me","from my perspective","in my view","from my view","from my standpoint","for me"];
  const OPINION_PHRASES = OPINION_PHRASES_ARR.join("|");
  const OPINION_PHRASE_REGEXES = [
    {label: "op1", regex: new RegExp(`(?:i(?:${DONT_PHRASES}|${SKIP_WORDS})? (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
    {label: "op2", regex: new RegExp(`(?:i'm [a-z]+ to (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
    {label: "op3", regex: new RegExp(`(?:${OPINION_PHRASES},? )`, 'i')}
  ];




  const STRONG_FACT_WORDS_ARR = ["are","can't","demonstrate","demonstrates","did","had","is","needs","should","will","would"];
  const STRONG_FACT_WORDS = STRONG_FACT_WORDS_ARR.join("|");
  const WEAKER_FACT_WORDS_ARR = ["were","was","has"];
  const WEAKER_FACT_WORDS = WEAKER_FACT_WORDS_ARR.join("|");
  // const FACT_WORDS = new RegExp(`${STRONG_FACT_WORDS}|${WEAKER_FACT_WORDS}`);
  const FACT_WORDS_ARR = WEAKER_FACT_WORDS_ARR.concat(STRONG_FACT_WORDS_ARR);
  const FACT_WORDS = FACT_WORDS_ARR.join("|");
  const FACT_PHRASES = new RegExp();
  const FACT_PHRASE_REGEXES = [
    {label: "fp1", regex: new RegExp(`(?:[tT]he [A-Z][a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
    {label: "fp2", regex: new RegExp(`(?:(?:^|.+\. )[A-Z][a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
    {label: "fp3", regex: new RegExp(`(?:[tT]he [^\.]*[A-Z][a-z]+\'s? [a-z]+ (?:\b${FACT_WORDS}\b) )`, 'gmi')},
    {label: "fp4", regex: new RegExp(`(?:(?:[^\.]*${PRONOUNS}) (?:\b${STRONG_FACT_WORDS}\b) )`, 'gi')},
    {label: "fp5", regex: new RegExp(`(?:(?:^|\. )${PRONOUNS} (?:\b${FACT_WORDS}\b) )`, 'gi')},
    {label: "fp6", regex: new RegExp(`(?:(?:^|[^.]* )(?:\b${PRESIDENT_NAMES}\b) (?:\b${FACT_WORDS}\b) )`, 'gi')},
    {label: "fp7", regex: new RegExp(`(?:(?:^|[^.]* )(?:${PRONOUNS}|${PRESIDENT_NAMES}) [a-z]+(?:ed|[^ia]s) )`, 'gi')},
    {label: "fp8", regex: new RegExp(`(?:(?:^|[^.]* )(?:${PRONOUNS}|${PRESIDENT_NAMES}) [a-z]+ [a-z]+(?:ed|[^ia]s) )`, 'gi')},
    {label: "fp9", regex: new RegExp(`(?:(?:^|\. )(?:She\'s|He\'s) )`, 'gi')}
  ];

  const history = [];
  var response = "${q://QID19/QuestionText}"
  var response = response.value.trim();
  const response_sentences = response.match(/[^\.!\?]+[ \.!\?]+/g);

    var i = 0
        while (i < response_sentences.length){
          let response_words = response_sentences[i].toString().split(' ')
               .map((w) => w.toLowerCase().replace(/[\W]/g, ''));
            i++;
          }
  
  let fact_phrases = [];
  let opinion_phrases = [];
  var j = 0;
  while (j < response_sentences.length) {
    // Check for opinion phrases
    let opinion_match = false;
    for (let {label, regex} of OPINION_PHRASE_REGEXES) {
      if (response_sentences[j].match(regex)) {
        opinion_match = true;
        op_match_phr = response_sentences[j].match(regex);
        opinion_phrases.push({match: op_match_phr[0], label: label, sent: j});
        break;
      }
    }
    if (!opinion_match) {
      // This sentence does not contain an opinion phrase
      let fact_match = false;
      for (let {label, regex} of FACT_PHRASE_REGEXES) {
        if (response_sentences[j].match(regex)) {
          fact_match = true;
          fact_match_phr = response_sentences[j].match(regex);
          fact_phrases.push({match: fact_match_phr[0], label: label, sent: j});
          break;
        }
      }
    }
    j++;
  }


  // Do something with fact_sentences and opinion_sentences
    console.log('OP PHRASES :', opinion_phrases)

    let opinion_phrases_t2 = opinion_phrases.length;
    // console.log('OP PHR NUM: ', opinion_phrases_t2)

    const opinion_phrases_uniq = opinion_phrases.filter((obj, index) =>
      opinion_phrases.findIndex((item) => item.sent === obj.sent) === index
    );
     console.log('OP PHRASES UNIQUE:', opinion_phrases_uniq)

    let opinion_phrases_uniq_t2 = opinion_phrases_uniq.length;
    // console.log('OP PHR UNIQUE NUM : ', opinion_phrases_uniq_t2)

    console.log('FACT PHRASES :', fact_phrases)

    let fact_phrases_t2 = fact_phrases.length;
    // console.log('FACT PHR NUM: ', fact_phrases_t2)


    const fact_phrases_uniq = fact_phrases.filter((obj, index) =>
      fact_phrases.findIndex((item) => item.sent === obj.sent) === index
    );
    console.log('FACT PHRASES UNIQUE:', fact_phrases_uniq)
    // Object.keys(fact_phrases[1]).forEach((prop)=> console.log(prop));


    let fact_phrases_uniq_t2 = fact_phrases_uniq.length;
    // console.log('FACT PHR NUM (UNIQUE): ', fact_phrases_uniq_t2)

    let net_score = opinion_phrases_uniq_t2 - fact_phrases_uniq_t2;


    if (response !== 'NA') {
      console.log('-------------------------');
      // console.log('RESPONSE: ', response);
      console.log('OPINION PHRASES:', opinion_phrases_uniq_t2);
      console.log('FACT PHRASES:', fact_phrases_uniq_t2);
      console.log('NET SCORE:', net_score);
    }
  
  
 const result = {
      input: response,
      net_score: net_score,
      opinion_phrases_t2: opinion_phrases_uniq_t2,
      fact_phrases_t2: fact_phrases_uniq_t2,
      opinion_phrases: opinion_phrases,
      fact_phrases: fact_phrases
    };
console.log(result)
  
 Qualtrics.SurveyEngine.setEmbeddedData('feedback', result);

});

Qualtrics.SurveyEngine.addOnUnload(function()
{
    /*Place your JavaScript here to run when the page is unloaded*/

});

When I preview the survey and look at the console, the regular expressions are missing the string variables (e.g. SKIP_WORDS):

const OPINION_PHRASE_REGEXES = [
    {label: "op1", regex: new RegExp(`(?:i(?:|)? (?:\b\b) )`, 'i')},
    {label: "op2", regex: new RegExp(`(?:i'm [a-z]+ to (?:\b\b) )`, 'i')},
    {label: "op3", regex: new RegExp(`(?:,? )`, 'i')}
  ];

This is what I would expect to see there:

const OPINION_PHRASE_REGEXES = [
    {label: "op1", regex: new RegExp(`(?:i(?:${DONT_PHRASES}|${SKIP_WORDS})? (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
    {label: "op2", regex: new RegExp(`(?:i'm [a-z]+ to (?:\b${I_OPINION_WORDS}\b) )`, 'i')},
    {label: "op3", regex: new RegExp(`(?:${OPINION_PHRASES},? )`, 'i')}
  ];

Is there Qualtrics syntax I've crossed wires with? I'm pretty new to Stack and JavaScript -- would love any help!

Ian Davis
  • 11
  • 2
  • `(?:\b${FACT_WORDS}\b)` looks a bit weird. Currently it looks like this: `/(?:\bwere|was|has|...|would\b)/` (note the two `\b`s' places). `(?:[^\.]*${PRONOUNS})` doesn't seem good to me either. You probably want `\b(?:${FACT_WORDS})\b` and `[^\.]*(?:${PRONOUNS})`, correct? – InSync Apr 14 '23 at 22:19

1 Answers1

0

You should put \b outside the group that contains all the alternatives you're matching. Otherwise the leading \b is only associated with the first word, and the trailing \b is associated with the last word. All the other words will be matched without requiring any word boundaries around them.

Also, you need to escape the backslashes to make them literal; otherwise, \b is the escape sequence for the backspace character.

(?:\b${FACT_WORDS}\b)

should be

\\b(?:${FACT_WORDS})\\b
Barmar
  • 741,623
  • 53
  • 500
  • 612