Logistic regression not generalizing

Question

According to Andrew Ng's lecture on logictic regression on Coursera the following cost function can be minimized using the update expression below:

Running that update function several hundred times on ~150 samples, I get the following pattern, though the cost seems to be decreasing after each iteration as expected:

The circles are the samples I'm training on, where the input features are the (x, y) coordinate of each point, and the color is the target label. The red or yellow background is what the model predicts that (x, y) input classifies as (red = 0, yellow = 1).

Question

Is that update routine not the correct partial derivative of that corresponding cost function J?
What can this output pattern be an indication of?

Training method

// A single pass/epoch

const lr = 0.003;
let params = [0.5, 0.5, 0.5];

const scores = samples.map(sample => sig(sum(sample, params));
const errors = scores.map((score, i) => score - labels[i][0]);

params = params.map((param, col) => {
  return param - lr * errors.reduce((acc, error, row) => {
    return acc + error * samples[row][col];
  }, 0);
});

Sample training data

const samples = [
  [1, 142, 78],
  [1, 108, 182],
  [1, 396, 47],
  [1, 66,  102],
  [1, 165, 116],
  [1, 8,   106],
  [1, 245, 119],
  [1, 302, 17],
  [1, 96,  38],
  [1, 201, 132],
];

const labels = [
  [0],
  [1],
  [0],
  [0],
  [1],
  [1],
  [1],
  [0],
  [1],
];

Edit

Here's a JSBin of this: https://jsbin.com/jinole/edit?html,js,output

You should probably post this on [CS.SE](https://cs.stackexchange.com). — Derek 朕會功夫, Jul 13 '17 at 20:19
@Derek朕會功夫: CS might get better response, but I think the question is fine here: it's a specific programming problem. — Prune, Jul 13 '17 at 21:20
@rodrigo-silveira: can you give us a driver program (full MCVE) and a few iterations of output for the sample case? — Prune, Jul 13 '17 at 21:20

score 4 · Accepted Answer · edited Jul 13 '17 at 23:09

Your problem is purely numerical, since you implemented logistic loss directly, your function J needs to take exponent of a point. At the same time, your data is huge, your x/y coordinates are in hundreads. exp(400) results in NaNs in JS, so your whole code fails to converge. All you need to do is to place your points in [0,2] x [0,4] instead of [0,200] x [0, 400] rectangle and it will work just fine.

For example:

function sum(x, w) {
  return x.reduce((acc, _x, i) => acc + _x * w[i], 0);
}

function sig(z) {
  return 1 / (1 + Math.exp(-z));
}

function cost(scores, labels) {
  return -(1 / scores.length) * scores.reduce((acc, score, i) => {
    var y = labels[i][0];
    return y * Math.log(score) + (1 - y) * Math.log(1 - score);
  }, 0);
}

function clear(ctx) {
  ctx.clearRect(0, 0, 400, 200);
}

function render(ctx, points) {
  points.forEach(point => {
    if (point[2] > 0) {
      ctx.fillStyle = '#3c5cff';
    } else {
      ctx.fillStyle = '#f956ff';
    }
    ctx.fillRect(Math.max(0, point[0] * 100 - 2), Math.max(0, point[1] * 100 - 2), 4, 4);
    //      ctx.fillRect(point[0], point[1], 1, 1);
  })
}

function renderEach(ctx, params) {
  for (let y = 0; y < 200; y++) {
    for (let x = 0; x < 400; x++) {
      if (sig(sum([1, x / 100, y / 100], params)) < 0.5) {
        ctx.fillStyle = '#b22438';
      } else {
        ctx.fillStyle = '#fff9b6';
      }

      ctx.fillRect(x, y, 1, 1);
    }
  }
}

function doEpoch(samples, params, learningRate, lastCost, cycle, maxCycles) {
  var scores = samples.map(sample => sig(sum(sample, params)));
  var errors = scores.map((score, i) => score - labels[i][0]);

  var p = document.getElementById('log');
  if (!p) {
    p = document.createElement('p');
    p.setAttribute('id', 'log');
    document.body.appendChild(p);
  }

  params = params.map((param, col) => {
    return param - learningRate * errors.reduce((acc, error, row) => (acc + error * samples[row][col]), 0);
  });

  var J = cost(scores, labels);
  if (lastCost === null) {
    lastCost = J;
  }

  if (cycle % 100 === 0) {
    p.textContent = `Epoch = ${cycle}, Cost = ${J} (${J - lastCost}), Params = ${JSON.stringify(params, null, 2)}`;
    clear(ctx);
    renderEach(ctx, params);
    render(ctx, points);
  }

  if (cycle < maxCycles) {
    setTimeout(function() {
      doEpoch(samples, params, learningRate, J, cycle + 1, maxCycles);
    }, 10);
  }
}

var canvas = document.createElement('canvas');
canvas.width = 400;
canvas.height = 200;
document.body.appendChild(canvas);
var ctx = canvas.getContext('2d');

var lineY = 150;
var points = [];
for (let i = 0; i < 500; i++) {
  var point = [parseInt(Math.random() * canvas.width, 10) / 100, parseInt(Math.random() * canvas.height, 10) / 100];
  point.push(Number(point[1] <= lineY / 100));
  points.push(point);
}

render(ctx, points);

var samples = points.map(point => [point[0], point[1]]);
var labels = points.map(point => [point[2]]);

console.log('Samples', JSON.stringify(samples.slice(0, 10)));
console.log('Labels', JSON.stringify(labels.slice(0, 10)));

var params = [1].concat(samples[0].map(() => Math.random()));
var withBias = samples.map(sample => [1].concat(sample));

var epochs = 100000;
var learningRate = 0.01;
var lastCost = null;

doEpoch(withBias, params, learningRate, lastCost, 0, epochs);

body {
  background: #eee;
  padding: 0;
  margin: 0;
  font-family: monospace;
}

canvas {
  background: #fff;
  width: 100%;
  image-rendering: pixelated;
}

<div id="plot-app"></div>

Logistic regression not generalizing

Question

Training method

Sample training data

Edit

1 Answers1