1

Problem statement: Two input string with styling like strong,li,ol,line-through etc in HTML. In the screen, there should be two box : Original Text & Edited Text. If any text of original Text is missing in Edited Text then it should highlight that word in red colour. If he has added anything in Edited Text which is not in Original Text, then it should highlight that text in yellow colour in Edited Text

enter image description here

Expected Output: In first box it should highlight the deleted text in red colour & In second box it should highlight the edited text in yellow colour

enter image description here

Solution Tried: : 1> Tried with DIff Match Patch but not working properly as my input string contains html string. I can't strip the tags as it will lose its styling.

2> Tried with HtmlDiff but this is giving both deleted word & modified word in a single string. Very difficult to modify this library.

Any lead or help will be much appreciated. Thank you

Sagar Kodte
  • 3,723
  • 4
  • 23
  • 52
Pranav Kumar
  • 79
  • 3
  • 17

2 Answers2

4
<!DOCTYPE html>
<html>
<head>
<meta name="viewport" content="initial-scale=1.0, user-scalable=no"/>
<meta http-equiv="content-type" content="text/html; charset=UTF-8"/>
<script src="http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js"></script>
<style>
html, body {
  font: normal 14px/1.4 sans-serif;
  background: #f1f1f1;
}

p {
  margin: 0 0 1rem;
}

ins {
  background: lightgreen;
  text-decoration: none;
}

del {
  background: pink;
}

table {
  border-collapse: collapse;
  width: 100%;
}
table td{
  border: 1px solid #ccc;
}

.card {
  background: #fff;
  border: 1px solid #ccc;
  border-radius: 3px;
  margin: 1rem;
  padding: 1rem;
}

.card .card {
  margin: 0;
  flex: 1 0 0;
}

.row {
  display: flex;
  justify-content: space-between;
  margin-right: -8px;
}

.col {
  display: flex;
  flex: 1 0 0;
  flex-direction: column;
  margin-right: 8px;
}
</style>
</head>
<body>
<div class="card">
  <div class="row">
    <div class="col">
      <h4>Past version</h4>
      <div class="card" id="outputOriginal"></div>
    </div>
    <div class="col">
      <h4>Changes</h4>
      <div class="card" id="output"></div>
    </div>
    <div class="col">
      <h4>New version</h4>
      <div class="card" id="outputNew"></div>
    </div>
  </div>
</div>

</body>
</html>
<script>

  var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, html_to_tokens, is_end_of_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
  is_end_of_tag = function(char) {
    return char === '>';
  };
  is_start_of_tag = function(char) {
    return char === '<';
  };
  is_whitespace = function(char) {
    return /^\s+$/.test(char);
  };
  is_tag = function(token) {
    return /^\s*<[^>]+>\s*$/.test(token);
  };
  isnt_tag = function(token) {
    return !is_tag(token);
  };
  Match = class Match {
    constructor(start_in_before1, start_in_after1, length1) {
      this.start_in_before = start_in_before1;
      this.start_in_after = start_in_after1;
      this.length = length1;
      this.end_in_before = (this.start_in_before + this.length) - 1;
      this.end_in_after = (this.start_in_after + this.length) - 1;
    }
  };
  html_to_tokens = function(html) {
    var char, current_word, i, len, mode, words;
    mode = 'char';
    current_word = '';
    words = [];
    for (i = 0, len = html.length; i < len; i++) {
      char = html[i];
      switch (mode) {
        case 'tag':
          if (is_end_of_tag(char)) {
            current_word += '>';
            words.push(current_word);
            current_word = '';
            if (is_whitespace(char)) {
              mode = 'whitespace';
            } else {
              mode = 'char';
            }
          } else {
            current_word += char;
          }
          break;
        case 'char':
          if (is_start_of_tag(char)) {
            if (current_word) {
              words.push(current_word);
            }
            current_word = '<';
            mode = 'tag';
          } else if (/\s/.test(char)) {
            if (current_word) {
              words.push(current_word);
            }
            current_word = char;
            mode = 'whitespace';
          } else if (/[\w\#@]+/i.test(char)) {
            current_word += char;
          } else {
            if (current_word) {
              words.push(current_word);
            }
            current_word = char;
          }
          break;
        case 'whitespace':
          if (is_start_of_tag(char)) {
            if (current_word) {
              words.push(current_word);
            }
            current_word = '<';
            mode = 'tag';
          } else if (is_whitespace(char)) {
            current_word += char;
          } else {
            if (current_word) {
              words.push(current_word);
            }
            current_word = char;
            mode = 'char';
          }
          break;
        default:
          throw new Error(`Unknown mode ${mode}`);
      }
    }
    if (current_word) {
      words.push(current_word);
    }
    return words;
  };
  find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) {
    var best_match_in_after, best_match_in_before, best_match_length, i, index_in_after, index_in_before, j, len, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, ref, ref1;
    best_match_in_before = start_in_before;
    best_match_in_after = start_in_after;
    best_match_length = 0;
    match_length_at = {};
    for (index_in_before = i = ref = start_in_before, ref1 = end_in_before;
      (ref <= ref1 ? i < ref1 : i > ref1); index_in_before = ref <= ref1 ? ++i : --i) {
      new_match_length_at = {};
      looking_for = before_tokens[index_in_before];
      locations_in_after = index_of_before_locations_in_after_tokens[looking_for];
      for (j = 0, len = locations_in_after.length; j < len; j++) {
        index_in_after = locations_in_after[j];
        if (index_in_after < start_in_after) {
          continue;
        }
        if (index_in_after >= end_in_after) {
          break;
        }
        if (match_length_at[index_in_after - 1] == null) {
          match_length_at[index_in_after - 1] = 0;
        }
        new_match_length = match_length_at[index_in_after - 1] + 1;
        new_match_length_at[index_in_after] = new_match_length;
        if (new_match_length > best_match_length) {
          best_match_in_before = index_in_before - new_match_length + 1;
          best_match_in_after = index_in_after - new_match_length + 1;
          best_match_length = new_match_length;
        }
      }
      match_length_at = new_match_length_at;
    }
    if (best_match_length !== 0) {
      match = new Match(best_match_in_before, best_match_in_after, best_match_length);
    }
    return match;
  };
  recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) {
    var match;
    match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after);
    if (match != null) {
      if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) {
        recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks);
      }
      matching_blocks.push(match);
      if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) {
        recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks);
      }
    }
    return matching_blocks;
  };
  create_index = function(p) {
    var i, idx, index, len, ref, token;
    if (p.find_these == null) {
      throw new Error('params must have find_these key');
    }
    if (p.in_these == null) {
      throw new Error('params must have in_these key');
    }
    index = {};
    ref = p.find_these;
    for (i = 0, len = ref.length; i < len; i++) {
      token = ref[i];
      index[token] = [];
      idx = p.in_these.indexOf(token);
      while (idx !== -1) {
        index[token].push(idx);
        idx = p.in_these.indexOf(token, idx + 1);
      }
    }
    return index;
  };
  find_matching_blocks = function(before_tokens, after_tokens) {
    var index_of_before_locations_in_after_tokens, matching_blocks;
    matching_blocks = [];
    index_of_before_locations_in_after_tokens = create_index({
      find_these: before_tokens,
      in_these: after_tokens
    });
    return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks);
  };
  calculate_operations = function(before_tokens, after_tokens) {
    var action_map, action_up_to_match_positions, i, index, is_single_whitespace, j, last_op, len, len1, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed;
    if (before_tokens == null) {
      throw new Error('before_tokens?');
    }
    if (after_tokens == null) {
      throw new Error('after_tokens?');
    }
    position_in_before = position_in_after = 0;
    operations = [];
    action_map = {
      'false,false': 'replace',
      'true,false': 'insert',
      'false,true': 'delete',
      'true,true': 'none'
    };
    matches = find_matching_blocks(before_tokens, after_tokens);
    matches.push(new Match(before_tokens.length, after_tokens.length, 0));
    for (index = i = 0, len = matches.length; i < len; index = ++i) {
      match = matches[index];
      match_starts_at_current_position_in_before = position_in_before === match.start_in_before;
      match_starts_at_current_position_in_after = position_in_after === match.start_in_after;
      action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()];
      if (action_up_to_match_positions !== 'none') {
        operations.push({
          action: action_up_to_match_positions,
          start_in_before: position_in_before,
          end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0),
          start_in_after: position_in_after,
          end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0)
        });
      }
      if (match.length !== 0) {
        operations.push({
          action: 'equal',
          start_in_before: match.start_in_before,
          end_in_before: match.end_in_before,
          start_in_after: match.start_in_after,
          end_in_after: match.end_in_after
        });
      }
      position_in_before = match.end_in_before + 1;
      position_in_after = match.end_in_after + 1;
    }
    post_processed = [];
    last_op = {
      action: 'none'
    };
    is_single_whitespace = function(op) {
      if (op.action !== 'equal') {
        return false;
      }
      if (op.end_in_before - op.start_in_before !== 0) {
        return false;
      }
      return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9));
    };
    for (j = 0, len1 = operations.length; j < len1; j++) {
      op = operations[j];
      if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) {
        last_op.end_in_before = op.end_in_before;
        last_op.end_in_after = op.end_in_after;
      } else {
        post_processed.push(op);
        last_op = op;
      }
    }
    return post_processed;
  };
  consecutive_where = function(start, content, predicate) {
    var answer, i, index, last_matching_index, len, token;
    content = content.slice(start, +content.length + 1 || 9e9);
    last_matching_index = void 0;
    for (index = i = 0, len = content.length; i < len; index = ++i) {
      token = content[index];
      answer = predicate(token);
      if (answer === true) {
        last_matching_index = index;
      }
      if (answer === false) {
        break;
      }
    }
    if (last_matching_index != null) {
      return content.slice(0, +last_matching_index + 1 || 9e9);
    }
    return [];
  };
  wrap = function(tag, content) {
    var length, non_tags, position, rendering, tags;
    rendering = '';
    position = 0;
    length = content.length;
    while (true) {
      if (position >= length) {
        break;
      }
      non_tags = consecutive_where(position, content, isnt_tag);
      position += non_tags.length;
      if (non_tags.length !== 0) {
        rendering += `<${tag}>${non_tags.join('')}</${tag}>`;
      }
      if (position >= length) {
        break;
      }
      tags = consecutive_where(position, content, is_tag);
      position += tags.length;
      rendering += tags.join('');
    }
    return rendering;
  };
  op_map = {
    equal: function(op, before_tokens, after_tokens) {
      return before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9).join('');
    },
    insert: function(op, before_tokens, after_tokens) {
      var val;
      val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9);
      return wrap('ins', val);
    },
    delete: function(op, before_tokens, after_tokens) {
      var val;
      val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
      return wrap('del', val);
    }
  };
  op_map.replace = function(op, before_tokens, after_tokens) {
    return (op_map.delete(op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens));
  };
  render_operations = function(before_tokens, after_tokens, operations) {
    var i, len, op, rendering;
    rendering = '';
    for (i = 0, len = operations.length; i < len; i++) {
      op = operations[i];
      rendering += op_map[op.action](op, before_tokens, after_tokens);
    }
    return rendering;
  };
  diff = function(before, after) {
    var ops;
    if (before === after) {
      return before;
    }
    before = html_to_tokens(before);
    after = html_to_tokens(after);
    ops = calculate_operations(before, after);
    return render_operations(before, after, ops);
  };
  diff.html_to_tokens = html_to_tokens;
  diff.find_matching_blocks = find_matching_blocks;
  find_matching_blocks.find_match = find_match;
  find_matching_blocks.create_index = create_index;
  diff.calculate_operations = calculate_operations;
  diff.render_operations = render_operations;
  if (typeof define === 'function') {
    define([], function() {
      return diff;
    });
  } else if (typeof module !== "undefined" && module !== null) {
    module.exports = diff;
  } else {
    this.htmldiff = diff;
  }




  let originalHTML = `
<p>Hello Mr. Wayne, decide what to do:</p>
<span> Entertainment Session </span>
<label> Country Second </label>
<table>
<tbody>
<tr>
<td>s</td>
<td>a</td>
<td>b1</td>
</tr>
<tr>
<td>s</td>
<td>y</td>
<td>x</td>
</tr>
<tr>
<td>s</td>
<td>1</td>
<td>2</td>
</tr>
</tbody>
</table>

<ul><li>Kill The Joker</li><li>Save Thalia Al Gul</li><li>Save Gotham</li></ul><h4>I am a heading 4</h4>`;

let newHTML = `<p>Hello Mr. Wayne, decide what to do:</p>

<span> Entertainment Session general</span>
<label> Country First </label>
<p>Call Alfred, take Thalia Al Gul to the cinema, save Gotham</p>
<table>
<tbody>
<tr>
<td>s</td>
<td>a</td>
<td>b1</td>
</tr>
<tr>
<td>s</td>
<td>y</td>
<td>x</td>
</tr>
<tr>
<td><p>ssiludfhgljkshdflkgjhlskdfjg bkm,nb,mnb,mnb,mnb ,mnb,mnb,mnb ,mnb,mnb ,mnb ,mnb ,mnb ,mnb,mnb ,mnb </p></td>
<td>1</td>
<td>2</td>
</tr>
</tbody>
</table>
<span>Use the mouse to choose an option.</span>`;

// Diff HTML strings
let output = htmldiff(originalHTML, newHTML);

// Show HTML diff output as HTML (crazy right?)!
document.getElementById("output").innerHTML = output;
document.getElementById("outputOriginal").innerHTML = originalHTML;
document.getElementById("outputNew").innerHTML = newHTML;
</script>
-1

It may be easier for you to manually compare the strings of HTML code by splitting the text into string arrays (split by char ' ').

For example, for part 1 of your problem, you could compare words (string arrays) and add in a span with class highlightRed to make the background red.

Example: https://codepen.io/vck3000/pen/xYeLKq/?editors=1111

//get the strings
var div1Text = document.querySelector("#div1").innerHTML;
var div2Text = document.querySelector("#div2").innerHTML;

//split the strings
var div1TextSplitted = div1Text.split(" ");
var div2TextSplitted = div2Text.split(" ");

//variables to cycle through the split string
var wordNumber1 = 0;
var wordNumber2 = 0;

//new string to re-assign to the divs
var newDiv1Text = "";
var newDiv2Text = "";

//Part 1: Red highlight
while(true){
  //if we find a word that ISNT the same
  if(div1TextSplitted[wordNumber1] !== div2TextSplitted[wordNumber2]){
    //add in a span
    newDiv1Text += "<span class='highlightRed'>" + div1TextSplitted[wordNumber1] + "</span> ";
    //increment the variables
    wordNumber2++;
    wordNumber1++;
  }
  //otherwise we have the same text
  else{
    //append the same text with no changes
    newDiv1Text += div1TextSplitted[wordNumber1] + " ";
    //increment variables
    wordNumber1++;
    wordNumber2++;
  }

  //if we reach the end of either string, break
  if(div1TextSplitted[wordNumber1 + 1] === undefined || div2TextSplitted[wordNumber2 + 1] === undefined){
    break;
  }
}

//re-assign the new text to the div
document.querySelector("#div1").innerHTML = newDiv1Text;

Part 2 (highlight yellow) would be a similar task, but slightly harder. See if you can figure it out.

V. Kuo
  • 78
  • 2
  • 6