2
import pandas as pd
import numpy as np
import sklearn
from sklearn import linear_model
import matplotlib.pyplot as pyplot
import pickle
from matplotlib import style

data = pd.read_csv("student-mat.csv"  , sep=";")
data = data[["age", "traveltime" , "studytime" ,"failures" , "G1" , "G2" , "G3"]]

predict = "age"

x = np.array(data.drop([predict], 1))
y = np.array(data[predict])

x_train , x_test , y_train , y_test =  sklearn.model_selection.train_test_split(x , y, test_size= 0.1)

linear=linear_model.LinearRegression()
linear.fit(x,y)

acc = linear.score(x_test , y_test)
print(acc)

print('co: \n', linear.coef_)
print('intercept: \n', linear.intercept_)

predictions = linear.predict(x_train)

for x in range(len(predictions)):
    print(predictions[x], x_test[x], y_test[x])

style.use("ggplot")
pyplot.scatter(data["age"] , data["failures"])
pyplot.xlabel("traveltime")
pyplot.ylabel("studytime")
pyplot.show()

guys, i've searched a lot on the internet about this error but i didn't found anything the error is on this line: print(predictions[x], x_test[x], y_test[x]), i can't understand were the error come from

1 Answers1

0

The issue here is predictions is having 355 entries as you are making it on training set. Predict on the test set and then your code will work properly.

predictions = linear.predict(x_test)

for x in range(len(predictions)):
    print(predictions[x], x_test[x], y_test[x])
LOrD_ARaGOrN
  • 3,884
  • 3
  • 27
  • 49