lr = LinearRegression()
lasso = Lasso()
dt = DecisionTreeRegressor(random_state=375)
rf = RandomForestRegressor(random_state=375)
xgboost = xgb.XGBRegressor(random_state = 375)
classifiers = {
'Linear_Regression': lr,
'Lasso_Regression' : lasso,
'Decision_Tree': dt,
'Random_Forest': rf,
'XGBoost_Classifier': xgboost,
}
I measured RMSE for each model with the following code:
def best_model(classifiers, X_train, y_train, X_test, y_test):
models = {}
for classifier_name, classifier in classifiers.items():
model_trained = classifier.fit(X_train, y_train)
ypred = model_trained.predict(X_test)
validation_score = round(mean_absolute_error(y_test, ypred),5)
print(classifier, validation_score) # for testing
models[classifier_name] = {'model': model_test, 'RMSE': validation_score}
validation_results = defaultdict(list)
for k,v in models.items():
validation_results['classifier_name'].append(k)
validation_results['RMSE'].append(v['RMSE'])
validation_results = pd.DataFrame(validation_results)
return validation_results
validation_results = best_model(classifiers, X_train, y_train, X_train, y_train)
validation_results
But this gave me different results when I went through the same procedure for the individual model.
For instance,
classifier = DecisionTreeRegressor(random_state=375)
model_test = train(classifier, X_train, y_train)
ypred, validation_score = evaluate(model_test, X_test, y_test)
print(validation_score)
this gives me 0.41485, but the upper one measures the RMSE of the Decision Tree as 0.00992.
Is there anything wrong with it?