I've implemented the "xor problem" with cntk (python).
Currently it solves the problem only occasionally. How could I implement a more reliable network?
I guess the problem gets solved whenever the starting random weights are near optimal. I have tried binary_cross_entropy
as the loss function but it didn't improve. I tried tanh
as the non-linear function but that it didn't work either. I have also tried many different combinations of parameters learning_rate
, minibatch_size
and num_minibatches_to_train
. Please help.
Thanks
# -*- coding: utf-8 -*-
import numpy as np
from cntk import *
import random
import pandas as pd
input_dim = 2
output_dim = 1
def generate_random_data_sample(sample_size, feature_dim, num_classes):
Y = []
X = []
for i in range(sample_size):
if i % 4 == 0:
Y.append([0])
X.append([1,1])
if i % 4 == 1:
Y.append([0])
X.append([0,0])
if i % 4 == 2:
Y.append([1])
X.append([1,0])
if i % 4 == 3:
Y.append([1])
X.append([0,1])
return np.array(X,dtype=np.float32), np.array(Y,dtype=np.float32)
def linear_layer(input_var, output_dim,scale=10):
input_dim = input_var.shape[0]
weight = parameter(shape=(input_dim, output_dim),init=uniform(scale=scale))
bias = parameter(shape=(output_dim))
return bias + times(input_var, weight)
def dense_layer(input_var, output_dim, nonlinearity,scale=10):
l = linear_layer(input_var, output_dim,scale=scale)
return nonlinearity(l)
feature = input(input_dim, np.float32)
h1 = dense_layer(feature, 2, sigmoid,scale=10)
z = dense_layer(h1, output_dim, sigmoid,scale=10)
label=input(1,np.float32)
loss = squared_error(z,label)
eval_error = squared_error(z,label)
learning_rate = 0.5
lr_schedule = learning_rate_schedule(learning_rate, UnitType.minibatch)
learner = sgd(z.parameters, lr_schedule)
trainer = Trainer(z, (loss, eval_error), [learner])
def print_training_progress(trainer, mb, frequency, verbose=1):
training_loss, eval_error = "NA", "NA"
if mb % frequency == 0:
training_loss = trainer.previous_minibatch_loss_average
eval_error = trainer.previous_minibatch_evaluation_average
if verbose:
print ("Minibatch: {0}, Loss: {1:.4f}, Error: {2:.2f}".format(mb, training_loss, eval_error))
return mb, training_loss, eval_error
minibatch_size = 800
num_minibatches_to_train = 2000
training_progress_output_freq = 50
for i in range(0, num_minibatches_to_train):
features, labels = generate_random_data_sample(minibatch_size, input_dim, output_dim)
trainer.train_minibatch({feature : features, label : labels})
batchsize, loss, error = print_training_progress(trainer, i, training_progress_output_freq, verbose=1)
out = z
result = out.eval({feature : features})
a = pd.DataFrame(data=dict(
query=[str(int(x[0]))+str(int(x[1])) for x in features],
test=[int(l[0]) for l in labels],
pred=[l[0] for l in result]))
print(pd.DataFrame.drop_duplicates(a[["query","test","pred"]]).sort_values(by="test"))