I'm simulating the model where there are N marbles, out of which K marbles are good. We pick n marbles out of N marbles and are asked for the probability that exactly k out of the n picked ones are good.
I did this two ways: In both I generated an array containing K 'true' values and N-K 'false' values. But in the first method I shuffled this array and picked the n first values and counted how many of these are 'true'. In the second method I picked an index at random and removed that element from the array, looping this n times (and of course counting the 'true' elements I got).
The resulting distribution should be HyperGeometric(N, K, n). The first method gave me wrong results whereas the second gave the correct result. Why isn't it OK to pick the n first elements of the shuffled array or what else am I doing wrong? Here's my Javascript code:
function pickGoodsTest(N, K, n) {
var origArr = generateArr(N, i=> i<K);
shuffle(origArr);
var goods = 0;
for (let i=0; i<n; i++) if(origArr[i]) goods++;
return goods;
}
function pickGoodsTest2(N, K, n) {
var origArr = generateArr(N, i=> i<K);
var goods = 0;
for (let i=0; i<n; i++) {
let rndInd = randInt(0, origArr.length-1);
let wasGood = origArr.splice(rndInd, 1)[0];
if (wasGood) goods++;
}
return goods;
}
//helper functions:
function generateArr(len, indFunc) {
var ret = [];
for (let i=0; i<len; i++) {
ret.push(indFunc(i));
}
return ret;
}
function randInt(a, b){return a+Math.floor( Math.random()*(b-a+1) );}
function shuffle(arr) {
let arrLen = arr.length;
for (let i=0; i<arrLen; i++) {
let temp = arr[i];
let rndInd = randInt(0, arrLen-1);
arr[i] = arr[rndInd];
arr[rndInd] = temp;
}
}
These are plots of the the outcomes with values N=10, K=6, n=5 (simulated 500000 times):
The yellow dot is the value of the hypergeometric pmf.