Solved: Previously my dataset had around 1000 images. I increased it to 50 000 and now the neural network learns and works.
I have created a convolutional neural network for recognizing three emotions from facial expression(positive, neutral, negative emotion). Somehow, my error function does not get any better(error image). Training and validation error are constant for 100 epochs. What could be the reason?
Why the error is constant?
Here's my code:
function training(varargin)
setup ;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
rngNum = 1; % rng number for random weight initialization, e.g., 1,2,3
num_fcHiddenNeuron =1024; % # neurons in the fully-connected hidden layer
prob_fcDropout = 0.5; % dropout probability in the fully-connected hidden layer,
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% input data for training deep CNNs
imdb1 = load(['trainingdata']) ;
imdb2 = load(['testdata']) ;
imdb.images.data = cat(4, imdb1.images.data, imdb2.images.data);
imdb.images.labels = cat(2, imdb1.images.labels, imdb2.images.labels);
imdb.images.set = cat(2, imdb1.images.set, imdb2.images.set);
imdb.meta = imdb1.meta;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
trainOpts.batchSize = 200 ;
trainOpts.numEpochs = 100 ;
trainOpts.gpus = [] ;
trainOpts.continue = true ;
trainOpts.learningRate = [0.004*ones(1,25), 0.002*ones(1,25), 0.001*ones(1,25), 0.0005*ones(1,25)];
trainOpts = vl_argparse(trainOpts, varargin);
%% Training Deep CNNs
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% CNN configuration
net.layers = {} ;
% %
% % %% Conv1 - MaxPool1
rng(rngNum) %control random number generation
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,1,32, 'single'), 0.1*ones(1, 32, 'single')}}, ...
'stride', 1, ...
'pad', 1, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
% %%% Conv2 - MaxPool2
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,32,32, 'single'), 0.1*ones(1, 32, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', [1, 0, 1, 0]) ;
% %%% Conv3 - MaxPool3
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,32,64, 'single'), 0.1*ones(1, 64, 'single')}}, ...
'stride', 1, ...
'pad', 1, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', 2, ...
'pad', 0) ;
% %%% Fc Hidden
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.001*randn(5,5,64,num_fcHiddenNeuron, 'single'), 0.01*ones(1, num_fcHiddenNeuron, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 1/5, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'dropout', ...
'rate', prob_fcDropout) ;
%
% %%% Fc Output
rng(rngNum)
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{zeros(1,1,num_fcHiddenNeuron, 3, 'single'), zeros(1, 3, 'single')}}, ...
'stride', 1, ...
'pad', 0, ...
'filtersLearningRate', 1, ...
'biasesLearningRate', 1, ...
'filtersWeightDecay', 4, ...
'biasesWeightDecay', 0) ;
net.layers{end+1} = struct('type', 'softmaxloss') ;
% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% starting to train deep CNN
[net,info] = cnn_train(net, imdb, getBatch(opts), trainOpts, 'val', find(imdb.images.set == 2)) ;
net.layers(end) = [] ;
function fn = getBatch(opts)
% -------------------------------------------------------------------------
fn = @(x,y) getSimpleNNBatch(x,y) ;
end
% -------------------------------------------------------------------------
function [images, labels] = getSimpleNNBatch(imdb, batch)
% -------------------------------------------------------------------------
images = imdb.images.data(:,:,:,batch) ;
labels = imdb.images.labels(1,batch) ;
end