Trying to manipulate HTML with regular expressions is not a good idea: sooner or later you'll bump into some boundary condition where it fails. Maybe some <
or >
occur inside attribute values, or even inside text nodes, while the searched term may also occur at unexpected places, like in HTML comments, attribute values, or script tags, ... The list of boundary cases is long.
Furthermore, your search term may contain characters that have a special meaning in regular expression syntax, so you should at least escape those.
Here is a solution that interprets the string as HTML, using the DOM capabilities, and only replaces text in text nodes:
function escapeRegExp(str) {
return str.replace(/[\[\]\/{}()*+?.\\^$|-]/g, "\\$&");
}
function wrapText(sentence, word) {
const re = new RegExp("\\b(" + escapeRegExp(word) + ")\\b", "gi"),
span = document.createElement('span');
span.innerHTML = sentence;
Array.from(span.childNodes, function (node) {
if (node.nodeType !== 3) return;
node.nodeValue.split(re).forEach(function (part, i) {
let add;
if (i%2) {
add = document.createElement('span');
add.textContent = part;
add.className = 'someClass';
} else {
add = document.createTextNode(part);
}
span.insertBefore(add, node);
});
span.removeChild(node);
});
return span.innerHTML;
}
const html = 'This is some word. <span class="word">word</span> should stay',
result = wrapText(html, 'word');
console.log(result);
Recursing into elements
In comments you mentioned that you would now also like to have the replacements happening within some tags, like p
.
I'll assume that you want this to happen for all elements, except those that have a certain class, e.g. the class that you use for the wrapping span
elements, but you can of course customise the condition to your needs (like only recursing into p
, or ...).
The code needs only a few modifications:
function escapeRegExp(str) {
return str.replace(/[\[\]\/{}()*+?.\\^$|-]/g, "\\$&");
}
function wrapText(sentence, word) {
const re = new RegExp("\\b(" + escapeRegExp(word) + ")\\b", "gi"),
doc = document.createElement('span');
doc.innerHTML = sentence;
(function recurse(elem) {
Array.from(elem.childNodes, function (node) {
// Customise this condition as needed:
if (node.classList && !node.classList.contains('someClass')) recurse(node);
if (node.nodeType !== 3) return;
node.nodeValue.split(re).forEach(function (part, i) {
let add;
if (i%2) {
add = document.createElement('span');
add.textContent = part;
add.className = 'someClass';
} else {
add = document.createTextNode(part);
}
elem.insertBefore(add, node);
});
elem.removeChild(node);
});
})(doc);
return doc.innerHTML;
}
const html = '<p><b>Some word</b></p>. <span class="someClass">word</span> should stay',
result = wrapText(html, 'word');
console.log(result);