I am trying to auto-detect addresses on a page and add the class "address" where found.
var rangyPatternApplier = function(element, pattern, style) {
var innerText = element.innerText;
var matches = innerText.match(pattern);
if (matches) {
for (var i = 0; i < matches.length; i++) {
console.log("Match: " + matches[i]);
var start = innerText.indexOf(matches[i]);
var end = start + matches[i].length;
let range = document.createRange();
var start = innerText.indexOf(matches[i]);
console.log('inner text: ' + innerText);
console.log('start: ' + start);
console.log('starts with: ' + innerText.substring(start));
var end = start + matches[i].length;
var startNode = element.childNodes[0];
var endNode = startNode;
while (startNode.nodeValue.length < start) {
start -= startNode.nodeValue.length;
end -= startNode.nodeValue.length;
startNode = startNode.nextSibling;
endNode = startNode;
if (startNode == null) {
error.reportError("Just wrong in Sections.rangyPatternApplier");
return;
}
}
while (endNode.nodeValue.length < end) {
end -= endNode.nodeValue.length;
if (endNode.nextSibling) endNode = endNode.nextSibling;
while (!endNode.nodeValue) {
endNode = endNode.childNodes[0];
}
if (endNode == null) {
error.reportError("Just wrong in Sections.rangyPatternApplier");
}
}
range.setStart(startNode, start);
console.log("starts with: " + startNode.nodeValue.substring(start));
range.setEnd(endNode, end);
var applier = rangy.createClassApplier(style, {
elementTagName: "span",
elementProperties: {
},
});
window.getSelection().addRange(range);
applier.toggleSelection();
}
}
}
Called via:
$("P").each(function () {
rangyPatternApplier(this, new RegExp("\\d+\\s[A-z]+\\s[A-z0-9]+\\s(Street|St|Avenue|Av|Ave|Road|Rd)", "mgi"), "Address");
});
On text in a paragraph:
If the income renders the household ineligible for CA/CILOCA, the case will be systemically referred to the Administration for Children s Services Transitional Child Care Unit at 109 East 16th Street 3rd floor for evaluation of Transitional Child Care (TCC) benefits. The TCC Worker determines eligibility for up to 12 months of TCC benefits.
The regex is working, the address class is being applied. I am applying the range to the window selection because there appears to be a bug in rangy when applied just on the Range (I'm getting an error message). But somehow, when I create the range, the span appears 5 characters before the start of the address and ends 9 characters early. The early ending part could be due to the tag around the "th" in 16th street. But why is the range 5 characters earlier than what I'm finding in innerText?