Let's say I have two strings, is there any way to check if they are at least 90% similar?
var string1 = "theBoardmeetstoday,tomorrow51";
var string2 = "Board meets today, tomorrow";
Thanks,
Tegan
Let's say I have two strings, is there any way to check if they are at least 90% similar?
var string1 = "theBoardmeetstoday,tomorrow51";
var string2 = "Board meets today, tomorrow";
Thanks,
Tegan
The wikipedia entry for Levenshtein distance includes a sample implementation.
jsdifflib is a JavaScript port of Python's excellent difflib library.
It has a function ratio()
which "return[s] a measure of the sequences’ similarity as a float in the range [0, 1]."
Also consider Dice's Coefficient which is considered "mostly better" than the Levenshtein distance by the creator of the string-similarity github repo and its corresponding npm module.
Usage from its docs:
var stringSimilarity = require('string-similarity');
var similarity = stringSimilarity.compareTwoStrings('healed', 'sealed');
var matches = stringSimilarity.findBestMatch('healed', ['edward', 'sealed', 'theatre']);
String.levenshtein (a plugin MooTools)
check it out: http://mootools.net/forge/p/string_levenshtein
GitHub: https://github.com/thinkphp/String.levenshtein
This method calculates Levenshtein distance between two strings. In information theory and computer science, the Levenshtein distance is a metric for measuring the amount of difference between two sequences (called edit distance). The Levenshtein distance between two strings is given by minimum number of operations needed to transform one string into another given string, where possible operations are insertion, deletion, or substitution of a single character.
The Levenshtein distance algorithm has been used in:
piggybacking on other people's answers, I wrote a simple js function stringsAreSimilar
to do this:
// https://github.com/thinkphp/String.levenshtein/blob/master/Source/String.levenshtein.js
function getStringDifference(stringA, stringB) {
var cost = [],
str1 = stringA,
str2 = stringB,
n = str1.length,
m = str2.length,
i, j;
var minimum = function (a, b, c) {
var min = a;
if (b < min) {
min = b;
}
if (c < min) {
min = c;
}
return min;
};
if (n == 0) {
return;
}
if (m == 0) {
return;
}
for (var i = 0; i <= n; i++) {
cost[i] = [];
}
for (i = 0; i <= n; i++) {
cost[i][0] = i;
}
for (j = 0; j <= m; j++) {
cost[0][j] = j;
}
for (i = 1; i <= n; i++) {
var x = str1.charAt(i - 1);
for (j = 1; j <= m; j++) {
var y = str2.charAt(j - 1);
if (x == y) {
cost[i][j] = cost[i - 1][j - 1];
} else {
cost[i][j] = 1 + minimum(cost[i - 1][j - 1], cost[i][j - 1], cost[i - 1][j]);
}
} //endfor
} //endfor
return cost[n][m];
}
function stringsAreSimilar(stringA, stringB) {
var difference = getStringDifference(stringA, stringB);
debugConsoleLog("stringA" + stringA);
debugConsoleLog("stringB" + stringB);
debugConsoleLog("difference" + difference);
return difference < 10;
}
var string1 = "theBoardmeetstoday,tomorrow51";
var string2 = "Board meets today, tomorrow";
if(similar) {
console.log("they are similar");
} else {
console.log("they are not similar");
}
So I was trying to do this last year. I read that Levenshtein distance
is the solution for this purpose the OP asked here.
I saw a code from a thread and was not given much upvotes. Maybe because of its time complexity (having a forloop inside a forloop). I tried and it seem to work for me. The function returns a score wherein 0 is a great match on the other hand a higher score means deviating from a close match. Ill share and maybe someone can extend it or explain it better:
function editDistance(s1, s2) {
s1 = s1.toLowerCase();
s2 = s2.toLowerCase();
var costs = new Array();
for (var i = 0; i <= s1.length; i++) {
var lastValue = i;
for (var j = 0; j <= s2.length; j++) {
if (i == 0) costs[j] = j;
else {
if (j > 0) {
var newValue = costs[j - 1];
if (s1.charAt(i - 1) != s2.charAt(j - 1))
newValue =
Math.min(Math.min(newValue, lastValue), costs[j]) +
1;
costs[j - 1] = lastValue;
lastValue = newValue;
}
}
}
if (i > 0) costs[s2.length] = lastValue;
}
return costs[s2.length];
}
Good luck and share your thoughts on this - would be awesome! Cheers!