0
lr = LinearRegression()
lasso = Lasso()
dt = DecisionTreeRegressor(random_state=375)
rf = RandomForestRegressor(random_state=375)
xgboost = xgb.XGBRegressor(random_state = 375)

classifiers = {
    'Linear_Regression': lr,
    'Lasso_Regression' : lasso,
    'Decision_Tree': dt,
    'Random_Forest': rf,
    'XGBoost_Classifier': xgboost,
    }

I measured RMSE for each model with the following code:

def best_model(classifiers, X_train, y_train, X_test, y_test):
    models = {}
    for classifier_name, classifier in classifiers.items():       
        model_trained = classifier.fit(X_train, y_train)
        ypred = model_trained.predict(X_test)       
        validation_score = round(mean_absolute_error(y_test, ypred),5)
        
        print(classifier, validation_score) # for testing
        models[classifier_name] = {'model': model_test, 'RMSE': validation_score}
        
    validation_results = defaultdict(list)
    for k,v in models.items():
        validation_results['classifier_name'].append(k)
        validation_results['RMSE'].append(v['RMSE'])
    validation_results = pd.DataFrame(validation_results)
    return validation_results

validation_results = best_model(classifiers, X_train, y_train, X_train, y_train)
validation_results

But this gave me different results when I went through the same procedure for the individual model.

For instance,

classifier = DecisionTreeRegressor(random_state=375)
model_test = train(classifier, X_train, y_train)
ypred, validation_score = evaluate(model_test, X_test, y_test)
print(validation_score)

this gives me 0.41485, but the upper one measures the RMSE of the Decision Tree as 0.00992.

Is there anything wrong with it?

Jleeca
  • 29
  • 3

0 Answers0