If you guys can please review if the following approach (pseudo-code) is good to go to calcualte cosine similarity between 2 vectors:
var vectorA = [2,5,7,8];
var referenceVector= [1,1,1,1];
//Apply weights to vectors (apply positive or negative weights to elements)
var weightageVector = [1,0.5,2,1.5];
var weighted vectA = GetWeightedVector(vectorA);
//normalize each element to a value beteen 0 and 1
//@see http://stn.spotfire.com/spotfire_client_help/norm/norm_scale_between_0_and_1.htm
as calcuated here:http://jsfiddle.net/snehilw/86jqo1sm/4/
var normalizedVectorA = GetNormalizedVector(vectorA); //using the formula above
var cosineSimilarityScore = GetCosineSimilarityScore(referenceVector, normalizedVectorA );
can someone please advise if this is correct approach as this is not giving me correct results.
As requested, here is the code snippet:
var defaultVectorWeights = [1,0.5,2,1.5];
var referenceVector = [1, 1, 1, 1] //Default values for the reference vector (Do not change these);
var supportedVectorLength = referenceVector.length;
function getNormalizedVector(multiDimArray, vector){
var normalizedVector = [];
if(vector.length == supportedVectorLength){
var normalizedValue = 0;
for(var j = 0; j < supportedVectorLength ; j++){
var min = getMinMaxForMultidimensionalArrayColumn(multiDimArray,j)[0];
var max = getMinMaxForMultidimensionalArrayColumn(multiDimArray,j)[1];
normalizedValue = (max == min) ? 0.5 : (vector[j] - min) / (max - min);
normalizedVector.push(normalizedValue);
}
}
//console.log('normalizedVector='+normalizedVector);
return normalizedVector;
}
function getCosineSimilarityScore(vectorA, vectorB) {
var similarityScore;
if((vectorA.length == supportedVectorLength) && (vectorB.length == supportedVectorLength)){
var lenVectA = vectorA.length,
product = 0,
normVectorA = 0,
normVectorB = 0;
for (var i = 0; i < lenVectA ; i++) {
product += vectorA[i] * vectorB[i];
normVectorA += vectorA[i] * vectorA[i];
normVectorB += vectorB[i] * vectorB[i];
}
similarityScore = product / (Math.sqrt(normVectorA) * Math.sqrt(normVectorB));
}
else {
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vectors are of unequal lengths");
}
return similarityScore;
}
function getWeightedVector(vector) {
var vectorArray = []; //Initialize
if(vector.length == supportedVectorLength){
for(var j = 0; j < supportedVectorLength ; j++){
vectorArray.push(defaultVectorWeights[j]*vector[j]);
}
}
else{
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vector is of unsupported length");
}
return vectorArray;
}
function getMinMaxForMultidimensionalArrayColumn(multiDimArray, column){
var _MIN_MAX = []; //[min,max]
var columnarArray = [];
if(column < supportedVectorLength){
//Extract columnar array from the multi-dimensional array
$.map(multiDimArray, function( arrayVect) {
columnarArray.push(arrayVect[column]);
});
//Find the MIN and MAX
_MIN_MAX.push(Math.min.apply(Math,columnarArray));
_MIN_MAX.push(Math.max.apply(Math,columnarArray));
}
else{
//TODO: Handle exception/ Fire an event to notify the server about this exception
console.log("Cosine similarity workload vectors are of unequal lengths");
}
return _MIN_MAX;
}
function getAssociateWorkloadScore(multiDimArray,queryVector){
var workloadScore;
var weightedQueryVector = [];
var weightedMultiDimArr = [];
var normalizedMultiDimArr = [];
var normalizedQueryVector = [];
//Apply feature scaling
weightedQueryVector = getWeightedVector(queryVector);
weightedMultiDimArr = getWeightedMultiDimArr(multiDimArray);
normalizedQueryVector = getNormalizedVector(weightedMultiDimArr, weightedQueryVector);
workloadScore = getCosineSimilarityScore(referenceVector, normalizedQueryVector);
console.log('weightedQueryVector='+weightedQueryVector);
console.log('weightedMultiDimArr='+JSON.stringify(weightedMultiDimArr));
console.log('normalizedMultiDimArr='+JSON.stringify(normalizedMultiDimArr));
console.log('normalizedQueryVector='+normalizedQueryVector);
console.log('workloadScore='+JSON.stringify(workloadScore));
return workloadScore;
}
function getTeamWorkloadScore(multiDimArray){
var workloadScores = [];
for(var j = 0; j < multiDimArray.length ; j++){
workloadScores.push(getAssociateWorkloadScore(multiDimArray,multiDimArray[j]));
}
return workloadScores;
}