After some struggling, I decided to try a most simple task, training a network to classify weither a number is non-negtive. And I failed...
I generated the data with following code. And I'm not sure if it is right. I read the data back from the file, and it looked right, though...
#pragma comment(lib, "hdf5")
#pragma comment(lib, "hdf5_cpp")
#include <cstdint>
#include <array>
#include <random>
#include <vector>
using namespace std;
#include <H5Cpp.h>
using namespace H5;
mt19937 rng;
float randf(float i_min, float i_max)
{
return rng() * ((i_max - i_min) / 0x100000000) + i_min;
}
#define NAME "pos_neg"
#define TRAIN_SET_SIZE 0x100000
#define TEST_SET_SIZE 0x10000
void make(const string &i_cat, uint32_t i_count)
{
H5File file(NAME "." + i_cat + ".h5", H5F_ACC_TRUNC);
hsize_t dataDim[2] = { i_count, 1 };
hsize_t labelDim = i_count;
FloatType dataType(PredType::NATIVE_FLOAT);
DataSpace dataSpace(2, dataDim);
DataSet dataSet = file.createDataSet("data", dataType, dataSpace);
IntType labelType(PredType::NATIVE_INT);
DataSpace labelSpace(1, &labelDim);
DataSet labelSet = file.createDataSet("label", labelType, labelSpace);
vector<float> data(i_count);
vector<int> labels(i_count);
for (uint32_t i = 0; i < i_count / 2; ++i)
{
labels[i * 2] = 0;
data[i * 2] = randf(0.f, 1.f);
labels[i * 2 + 1] = 1;
data[i * 2 + 1] = randf(-1.f, 0.f);
}
dataSet.write(&data[0], PredType::NATIVE_FLOAT);
labelSet.write(&labels[0], PredType::NATIVE_INT);
}
int main()
{
make("train", TRAIN_SET_SIZE);
make("test", TEST_SET_SIZE);
}
And the network looks like this
name: "PosNegNet"
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TRAIN
}
hdf5_data_param {
source: "pos_neg_train.txt"
batch_size: 64
}
}
layer {
name: "data"
type: "HDF5Data"
top: "data"
top: "label"
include {
phase: TEST
}
hdf5_data_param {
source: "pos_neg_test.txt"
batch_size: 65536
}
}
layer {
name: "fc1"
type: "InnerProduct"
bottom: "data"
top: "fc1"
param {
lr_mult: 1
decay_mult: 1
}
param {
lr_mult: 2
decay_mult: 0
}
inner_product_param {
num_output: 1
weight_filler {
type: "xavier"
}
bias_filler {
type: "constant"
value: 0
}
}
}
layer {
name: "loss"
type: "SoftmaxWithLoss"
bottom: "fc1"
bottom: "label"
top: "loss"
}
layer {
name: "accuracy"
type: "Accuracy"
bottom: "fc1"
bottom: "label"
top: "accuracy"
include {
phase: TEST
}
}
And and one set of parameters I tried
net: "pos_neg.prototxt"
test_iter: 1
test_interval: 500
base_lr: 0.001
momentum: 0.9
momentum2: 0.999
lr_policy: "fixed"
display: 100
max_iter: 10000
snapshot: 5000
snapshot_prefix: "pos_neg"
type: "Adam"
solver_mode: GPU
And I ran caffe.exe on Windows. And I always got loss = 0, accuracy = 0.5.
I know I must have done something wrong, but I don't know from where to look, well, other than digging up source code...
And I found that caffe is fairly slow. I got only around 16 iterations per second for a float[64] data with 1024 item per batch on a 1080Ti. Was it normal or I did something wrong again?