6

I've got a 'table' of two columns represented as an array. The first column are numbers from 1 to 20 and they are labels, the second column are the corresponding values (seconds):

my_array = [ [ 3,4,5,3,4,5,2 ],[ 12,14,16,11,12,10,20 ] ];

I need the mean (average) for each label:

my_mean_array = [ [ 2,3,4,5 ],[ 20/1, (12+11)/2, (14+12)/2, (16+10)/2 ] ];
// edit: The mean should be a float - the notion above is just for clarification.
// Also the number 'labels' should remain as numbers/integers.

My try:

var a = my_array[0];
var b = my_array[1];
m = [];
n = [];
for( var i = 0; a.length; i++){
    m[ a[i] ] += b[i]; // accumulate the values in the corresponding place
    n[ a[i] ] += 1; // count the occurences
}
var o = [];
var p = [];
o = m / n;
p.push(n);
p.push(o);
Chrugel
  • 883
  • 2
  • 11
  • 19

3 Answers3

3

How about this (native JS, will not break on older browsers):

function arrayMean(ary) {
  var index = {}, i, label, value, result = [[],[]];

  for (i = 0; i < ary[0].length; i++) {
    label = ary[0][i];
    value = ary[1][i];
    if (!(label in index)) {
      index[label] = {sum: 0, occur: 0};
    }
    index[label].sum += value;
    index[label].occur++;
  }
  for (i in index) {
    if (index.hasOwnProperty(i)) {
      result[0].push(parseInt(i, 10));
      result[1].push(index[i].occur > 0 ? index[i].sum / index[i].occur : 0);
    }
  }
  return result;
}

FWIW, if you want fancy I've created a few other ways to do it. They depend on external libraries and are very probably an order of magnitude slower than a native solution. But they are nicer to look at.

It could look like this, with underscore.js:

function arrayMeanUnderscore(ary) {
  return _.chain(ary[0])
    .zip(ary[1])
    .groupBy(function (item) { return item[0]; })
    .reduce(function(memo, items) {
      var values = _.pluck(items, 1),
          toSum = function (a, b) { return a + b; };

      memo[0].push(items[0][0]);
      memo[1].push(_(values).reduce(toSum) / values.length);
      return memo;
    }, [[], []])
    .value();
}

// --------------------------------------------

arrayMeanUnderscore([[3,4,5,3,4,5,2], [12,14,16,11,12,10,20]]);
// -> [[2,3,4,5], [20,11.5,13,13]]

or like this, with the truly great linq.js (I've used v2.2):

function arrayMeanLinq(ary) {
  return Enumerable.From(ary[0])
    .Zip(ary[1], "[$, $$]")
    .GroupBy("$[0]")
    .Aggregate([[],[]], function (result, item) {
      result[0].push(item.Key());
      result[1].push(item.Average("$[1]"));
      return result;
    });
}

// --------------------------------------------

arrayMeanLinq([[3,4,5,3,4,5,2], [12,14,16,11,12,10,20]]);
// -> [[3,4,5,2], [11.5,13,13,20]]

As suspected, the "fancy" implementations are an order of magnitude slower than a native implementation: jsperf comparison.

Tomalak
  • 332,285
  • 67
  • 532
  • 628
  • some improvements: - I think that the if test: index.hasOwnProperty(i) is userless. - I think parseInt is useless as my_array has numbers. - To test if occur is 0, use the sentence ? instead. ocurr==0?0:sum/occur. – Adrian Maire Feb 13 '13 at 08:46
  • Just to clarify: Thank you all! I simply chosen this answer, b/c it gives me the correct/expected outcome 'out of the box'. With the (very elegant) solution from robertklep & Adrian Maire I end up with strings in my 'label'-subarray, which is not a problem, but Tomalak delivered a fitting solution. – Chrugel Feb 13 '13 at 08:57
  • 2
    @AdrianMaire `hasOwnProperty` is never useless when using `for ... in` to traverse an object. `parseInt` is needed as Tomalak uses an object (`index`) to store map of occurences. Without `parseInt` `i` would be a string. – Yoshi Feb 13 '13 at 08:57
  • @Yoshi Thanks for pointing this out, I was just going to write the same thing. -- Adrian, you never know whether the object prototype has been extended, that's what this check is for. I've changed the check for `0` to a ternary, though, that's a bit clearer. – Tomalak Feb 13 '13 at 09:29
  • @Chrugel Nothing is ever straight-forward. It all depends on perspective. If you add a JS library solutions will become nicer, but you add a learning curve for the library (and weight to your page). Anyway I've created alternative functions, have a look. – Tomalak Feb 13 '13 at 14:57
  • @Tomalak - don't worry, I am perfectly happy with the first solution! It was more the realisation how insufficient my js knowledge for even this very basic task is (but I am really somewhat disappointed the array-division isn't possible in js - in R it is!). To me the most important is the lesson with the [[],[]] - that helps a lot. – Chrugel Feb 13 '13 at 17:15
  • @Chrugel It's true, JS lacks a *common set* of advanced functions that operate on the basic data types. But you can do a lot by adding the right libraries, just like what jQuery does for DOM operations. I'm sure there's one for matrix math that you could have used. This thread mentiones a few: http://stackoverflow.com/questions/6306325/javascript-numerical-library-vectors-matrices-determinates-inversion – Tomalak Feb 13 '13 at 17:30
0
var temp = {}; 
my_array[0].map(function(label, i) {
  if (! temp[label])
  {
    temp[label] = [];
  }
  temp[label].push(my_array[1][i]);
});
var result = [ [], [] ];
for (var label in temp) {
  result[0].push(label);
  result[1].push(
    temp[label].reduce(function(p, v) { return p + v }) / temp[label].length
  );
}
robertklep
  • 198,204
  • 35
  • 394
  • 381
0

This function do not sort the resulted array like in your result example. If you need sorting, just say me and i will add it.

function getMeanArray(my_array)
{
    m = {}; //id={count,value}
    for( var i = 0; i<my_array[0].length; i++){
        if (m[my_array[0][i]]===undefined)
        { 
            m[my_array[0][i]]={count:0, value:0};
        }
        m[ my_array[0][i] ].value += my_array[1][i]; // accumulate the values in the corresponding place
        m[ my_array[0][i] ].count++; // count the occurences
    }
    var my_mean_array=[[],[]];
    for (var id in m)
    {
        my_mean_array[0].push(id);
        my_mean_array[1].push(m[id].count!=0?m[id].value/m[id].count:0);
    }
    return my_mean_array;
}
Adrian Maire
  • 14,354
  • 9
  • 45
  • 85