1

I am trying to implement logistic regression with gradient descent on the notMNIST dataset. This is my code thus far, which parses the data and plots the accuracy against the epochs. I have done my training in 7 mini batches of 500 each. There are a total of 5000 iterations and therefore 5000/7 epochs. My goal is to find the accuracy after each epoch and plot it against the epoch. And I want to do the same with the average loss at each epoch. I want to do this for the validation points.

This is the loss function I am implementing. enter image description here

However, for some reason, when I try to calculate accuracy I always get 100%, which doesn't make sense since I am finding the weight from the training and then using it on the validation set, so the algorithm cannot be correct 100% of the time. Also when I plot the losses, I get a linear function, which also doesn't make any sense.

Does anyone have ideas about what I am doing wrong? Any help would be appreciated!

#implement logistic regression 
#logistic regression prediction function is y = sigmoid(W^Tx + b)
#train the logistic regression model using SGD and mini batch size B = 500 on the two-class notNMIST dataset
#how to train the dataset:

import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

##############Constants##################################

BATCH_SIZE = 500;
NUM_BATCHES = 7;
NUM_ITERATIONS = 5000;
LEARNING_RATE = [0.005]#0.001, 0.0001];
PIXEL_SIZE = 784; #28x28
NUM_TRAINING_POINTS = 3500;
NUM_VALID_POINTS = 100;

###############Extracting data############################

with np.load("notMNIST.npz") as data :
    Data, Target = data ["images"], data["labels"]
    posClass = 2
    negClass = 9
    dataIndx = (Target==posClass) + (Target==negClass)
    Data = Data[dataIndx]/255.
    Target = Target[dataIndx].reshape(-1, 1)
    Target[Target==posClass] = 1
    Target[Target==negClass] = 0
    np.random.seed(521)
    randIndx = np.arange(len(Data))
    np.random.shuffle(randIndx)
    Data, Target = Data[randIndx], Target[randIndx]
    trainData, trainTarget = Data[:3500], Target[:3500]
    validData, validTarget = Data[3500:3600], Target[3500:3600]
    testData, testTarget = Data[3600:], Target[3600:]

################Manipulating Data##########################

trainX = np.reshape(trainData, (NUM_TRAINING_POINTS, PIXEL_SIZE));
validX = np.reshape(validData, (NUM_VALID_POINTS, PIXEL_SIZE))
batchesX = np.array(np.split(trainX, NUM_BATCHES));
batchesY = np.array(np.split(trainTarget, NUM_BATCHES));


################Defining variables########################

loss_Values = [[0 for x in range(NUM_BATCHES)] for y in range(715)]
lr = dict()
epoch_list = []
mean_list = []
accuracy_list = []

x = tf.placeholder(tf.float32, [PIXEL_SIZE, None], name = "input_points") #784 dimensions (28x28 pixels) 
W = tf.Variable(tf.truncated_normal(shape=[PIXEL_SIZE,1], stddev=0.5), name='weights')
b = tf.Variable(0.0, name='bias')
y = tf.placeholder(tf.float32, [None,1], name = "target_labels")#target labels
lambda_ = 0.01

##############Calculations###############################

#weight_squared_sum = tf.matmul(tf.transpose(W),W) #find the square of the weight vector
#calculating the bias term

with tf.Session() as sess:
    tf.global_variables_initializer().run()
    weight = W.eval()

weight_squared_sum = np.linalg.norm(weight)
loss_W = lambda_ /2 * weight_squared_sum #find the loss

y_hat = tf.add(tf.matmul(tf.transpose(W), x), b) #based on the sigmoid equation 
y_hat = tf.transpose(y_hat)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(logits = y_hat, labels = y) #sigmoid_cross_entropy_with_logits takes in the actual y and the predicted y 
total_loss = tf.add(tf.reduce_mean(cross_entropy,0),loss_W)

#############Training######################################
epoch = 0
with tf.Session() as sess:
    epoch = 0;
    tf.global_variables_initializer().run()

    for learning_rate in LEARNING_RATE:
        train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss) #change the learning rate each time

        for i in range(NUM_BATCHES*NUM_ITERATIONS):    
            sess.run(train_step, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]})
            print("i: ",i)
            print("LOSS:")
            print(sess.run(total_loss, feed_dict={x:np.transpose(batchesX[i%NUM_BATCHES]), y: batchesY[i%NUM_BATCHES]}))
            if( i % NUM_BATCHES == 0): #everytime we reach 0, a new epoch has started 
                loss_Values[epoch][i%NUM_BATCHES] = sess.run(cross_entropy, feed_dict={x: np.transpose(batchesX[i%NUM_BATCHES]) , y: batchesY[i%NUM_BATCHES]});
                correct_prediction = tf.equal(y, y_hat)
                accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
                accuracy_val = sess.run(accuracy, feed_dict={x: np.transpose(validX) , y: validTarget})
                print("Accuracy: ", accuracy_val)
                accuracy_list.append(accuracy_val) 
                epoch = epoch + 1;

        lr[learning_rate] = loss_Values;
    print("Final value")


    #for plotting purposes             
    N = len(loss_Values)
    for epoch in range (N): #find average over all input points in one epoch
        epoch_list.append(epoch)
        row = np.array(loss_Values[epoch])
        mean = np.add.reduce(row) / 3500;
        mean_list.append(mean)

    epoch_list = np.array(epoch_list)
    mean_list = np.array(epoch_list)
    accuracy_list = np.array(epoch_list)

    plt.figure()
    plt.plot(epoch_list, accuracy_list, '-', label = 'Average loss')
    plt.show()
today
  • 32,602
  • 8
  • 95
  • 115
aa1
  • 783
  • 1
  • 15
  • 31
  • Did you mean to set accuracy_list and mean_list as `np.array(epoch_list)`? – kww Mar 13 '18 at 02:38
  • oh no! I did not. Thank you for pointing that out. – aa1 Mar 13 '18 at 02:45
  • but do you know why it would still be continuously outputting zero as the accuracy? – aa1 Mar 13 '18 at 04:47
  • I'm guessing that since you're trying to compare 2 float arrays, it's very unlikely for them to be equal. You may want to get the predicted labels by running y_hat through softmax, and getting the predicted label using argmax. – kww Mar 13 '18 at 17:36
  • thank you so much! that solved the problem :) – aa1 Mar 13 '18 at 23:46

0 Answers0