I am trying to run the following linear regression from scratch code. When I create my object for my linear regression class and call my method, I am getting a type error.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv('/Users/MyName/Downloads/archive/prices.csv')
X = df['volume'].values
y = df['close'].values
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
class Lin_Reg():
def __init__(self, lr=0.01, n_iters=10000):
self.lr = lr
self.n_iters = n_iters
self.weights = None
self.bias = None
def fit(self, X, y):
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
for _ in range(self.n_iters):
y_pred = np.dot(X, self.weights) + self.bias
dw = (1/n_samples) * np.dot(X, (y_pred - y))
db = (1/n_samples) * np.sum(y_pred-y)
self.weight = self.weight -self.lr * dw
self.bias = self.bias -self.lr * db
def predict(self, X):
y_pred = np.dot(X, self.weights) + self.bias
return y_pred
reg = Lin_Reg()
reg.fit(X_train, y_train)
predictions = reg.predict(X_test)
The error message is
ValueError: not enough values to unpack (expected 2, got 1)
and the line generating this error is n_samples, n_features = X.shape
The dataset I'm working with can be found here: https://www.kaggle.com/datasets/dgawlik/nyse. I am using the prices.csv file.