I'm working on a small machine learning theoretical algorithm using nodeJs. My goal is to compare many array patterns to one source pattern then return how similar they are represented as a percent . For an example pattern1 maybe 80% similar to the source pattern .
What can be the best method for determining percent similarity for one array to another?
What I've done so far..
//source
var soureSequence = [0.53,0.55,0.50,0.40,0.50,0.52,0.58,0.60]
//patterns to compare
var sequence1 = [0.53,0.54,0.49,0.40,0.50,0.52,0.58,0.60]
var sequence2 = [0.53,0.55,0.50,0.42,0.50,0.53,0.57,0.62]
Since I've chosen a percent based outcome , I figured I should base my source pattern off percentage change from first value to second value in array .
var percentChange = (firstVal, secondVal) => {
var pChange = ((parseFloat(secondVal) - firstVal) /
Math.abs(firstVal)) * 100.00;
//To avoid NaN , Infinity , and Zero
if(!pChange || pChange == 0){
return 0.00000001
}
return pChange;
}
Here I will generate my source pattern from my source sequence
var storePattern = function(sequence){
var pattern = [];
for(var i = 0 ; i < sequence.length ; i++){
let $change = percentChange(sequence[i] , sequence[i + 1]);
if(i != sequence.length && $change ){
pattern.push($change)
}
}
return pattern;
}
var sourcePattern = storePattern(soureSequence);
Now I will create more patterns to be compared
var testPattern1 = storePattern(sequence1);
var testPattern2 = storePattern(sequence2);
Below is my comparison function
var processPattern = function(source , target){
var simularityArray = [];
for(var i = 0 ; i < target.length ; i++){
//Compare percent change at indexof testPattern to sourcePattern of same index
let change = Math.abs(percentChange(target[i] , source[i]));
simularityArray.push(100.00 - change);
}
var rating = simularityArray.reduce((a,b) => {
return a + b
});
//returns percent rating based of average of similarity pattern
rating = rating / parseFloat(source.length + ".00");
return rating;
}
Now I can try to estimate the similarity
var similarityOfTest1 = processPattern(sourcePattern , testPattern1)
My problem is that this only works on sequences within the same range of value .. for example 0.50 , 0.52 .. the percent change in these values would not be the same for 0.20 , 0.22 but the value difference is the same ie -> 0.02
I thought about a difference in value based pattern but at this point I'm lost.
All answers will be considered . Thanks for the help!