I am trying to optimize hyperparameters for ridge regression. But also add polynomial features. So, pipeline looks okay but getting error when try to gridsearchcv. Here:
# Importing the Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error
from collections import Counter
from IPython.core.display import display, HTML
sns.set_style('darkgrid')
# Data Preprocessing
from sklearn.datasets import load_boston
boston_dataset = load_boston()
dataset = pd.DataFrame(boston_dataset.data, columns = boston_dataset.feature_names)
dataset['MEDV'] = boston_dataset.target
# X and y Variables
X = dataset.iloc[:, 0:13].values
y = dataset.iloc[:, 13].values.reshape(-1,1)
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 25)
# Building the Model ------------------------------------------------------------------------
# Fitting regressior to the Training set
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
steps = [
('scalar', StandardScaler()),
('poly', PolynomialFeatures(degree=2)),
('model', Ridge())
]
ridge_pipe = Pipeline(steps)
ridge_pipe.fit(X_train, y_train)
# Predicting the Test set results
y_pred = ridge_pipe.predict(X_test)
# Applying k-Fold Cross Validation
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = ridge_pipe, X = X_train, y = y_train, cv = 10)
accuracies.mean()
#accuracies.std()
# Applying Grid Search to find the best model and the best parameters
from sklearn.model_selection import GridSearchCV
parameters = [ {'alpha': np.arange(0, 0.2, 0.01) } ]
grid_search = GridSearchCV(estimator = ridge_pipe,
param_grid = parameters,
scoring = 'accuracy',
cv = 10,
n_jobs = -1)
grid_search = grid_search.fit(X_train, y_train) # <-- GETTING ERROR IN HERE
Error:
ValueError: Invalid parameter ridge for estimator
What to do or, is there a better way to use ridge regression with pipeline? I would be pleased if put some sources about gridsearch because I am a newbie on this. The error: