1

Hi I am a beginner in coding in python and machine learning and I am trying to learn about what goes on under the hood of logistic regression and making it run in python from scratch. I have been tasked with plotting and ranking the weights/coefficients of logistic regression below in order to remove features with the least impact. But, whilst I've added a basic plot it doesn't help me rank the coefficients/thetas. I was initially going to try using seaborn's sns.coefplot() but this has been deprecated. Any help pointing in the right direction would be appreciated.

This is also using the wisconin breast cancer dataset (https://www.kaggle.com/uciml/breast-cancer-wisconsin-data)

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

df = pd.read_csv("cancerdata.csv")
X = df.values[:,2:-1].astype('float64')
X = (X - np.mean(X, axis =0)) /  np.std(X, axis = 0)


X = np.hstack([np.ones((X.shape[0], 1)),X])
X = MinMaxScaler().fit_transform(X)
Y = df["diagnosis"].map({'M':1,'B':0})
Y = np.array(Y)
X_train,X_test,Y_train,Y_test = 
train_test_split(X,Y,test_size=0.25)

def Sigmoid(z):
    return 1/(1 + np.exp(-z))

def Hypothesis(theta, x):   
    return Sigmoid(x @ theta) 

def Cost_Function(X,Y,theta,m):
    hi = Hypothesis(theta, X)
    _y = Y.reshape(-1, 1)
    J = 1/float(m) * np.sum(-_y * np.log(hi) - (1-_y) * np.log(1-hi))
    return J

def Cost_Function_Derivative(X,Y,theta,m,alpha):
    hi = Hypothesis(theta,X)
    _y = Y.reshape(-1, 1)
    J = alpha/float(m) * X.T @ (hi - _y)
    return J

def Gradient_Descent(X,Y,theta,m,alpha):
new_theta = theta - Cost_Function_Derivative(X,Y,theta,m,alpha)
    return new_theta

def Accuracy(theta):
    correct = 0
    length = len(X_test)
    prediction = (Hypothesis(theta, X_test) > 0.5)
    _y = Y_test.reshape(-1, 1)
    correct = prediction == _y
    my_accuracy = (np.sum(correct) / length)*100
    print ('LR Accuracy %: ', my_accuracy)

def Logistic_Regression(X,Y,alpha,theta,num_iters):
    m = len(Y)
    for x in range(num_iters):
        new_theta = Gradient_Descent(X,Y,theta,m,alpha)
        theta = new_theta
        if x % 100 == 0:
            #print ('theta: ', theta) 
    Accuracy(theta)
    x = np.linspace(-6, 6, 50)
    y = -(theta[0] + theta[1]*x)/theta[2]
    plt.plot(x, y)
    plt.plot(theta)
    plt.show()

ep = .012

initial_theta = np.random.rand(X_train.shape[1],1) * 2 * ep - ep
alpha = 0.5
iterations = 2000
Logistic_Regression(X_train,Y_train,alpha,initial_theta,iterations)
DN1
  • 234
  • 1
  • 13
  • 38

0 Answers0