I am trying to fit a sklearn linear regression model with many points from a pandas dataframe. this is the program:
features =["floors", "waterfront", "lat", "bedrooms", "sqft_basement", "view", "bathrooms", "sqft_living15", "sqft_above", "grade", "sqft_living"]
lm3 = LinearRegression()
lm3.fit(features,["prices"])
lm3.score(features,B)
The error is:
ValueError: Expected 2D array, got 1D array instead:
array=['floors' 'waterfront' 'lat' 'bedrooms' 'sqft_basement' 'view' 'bathrooms'
'sqft_living15' 'sqft_above' 'grade' 'sqft_living'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
I have the same process running fine elsewhere but it doesn't want to work here. I have single variable linear regression working elsewhere in the same jupyter lab. any help would be appreciated. I would attach the CSV I am running but its like 20,000 rows so I wont bother with the full thing. I will include the full error code tho:
ValueError Traceback (most recent call last)
Cell In[27], line 3
1 features =["floors", "waterfront","lat" ,"bedrooms" ,"sqft_basement" ,"view" ,"bathrooms","sqft_living15","sqft_above","grade","sqft_living"]
2 lm3 = LinearRegression()
----> 3 lm3.fit(features,["prices"])
4 lm3.score(features,B)
File D:\python\Lib\site-packages\sklearn\linear_model\_base.py:648, in LinearRegression.fit(self, X, y, sample_weight)
644 n_jobs_ = self.n_jobs
646 accept_sparse = False if self.positive else ["csr", "csc", "coo"]
--> 648 X, y = self._validate_data(
649 X, y, accept_sparse=accept_sparse, y_numeric=True, multi_output=True
650 )
652 sample_weight = _check_sample_weight(
653 sample_weight, X, dtype=X.dtype, only_non_negative=True
654 )
656 X, y, X_offset, y_offset, X_scale = _preprocess_data(
657 X,
658 y,
(...)
661 sample_weight=sample_weight,
662 )
File D:\python\Lib\site-packages\sklearn\base.py:584, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
582 y = check_array(y, input_name="y", **check_y_params)
583 else:
--> 584 X, y = check_X_y(X, y, **check_params)
585 out = X, y
587 if not no_val_X and check_params.get("ensure_2d", True):
File D:\python\Lib\site-packages\sklearn\utils\validation.py:1106, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
1101 estimator_name = _check_estimator_name(estimator)
1102 raise ValueError(
1103 f"{estimator_name} requires y to be passed, but the target y is None"
1104 )
-> 1106 X = check_array(
1107 X,
1108 accept_sparse=accept_sparse,
1109 accept_large_sparse=accept_large_sparse,
1110 dtype=dtype,
1111 order=order,
1112 copy=copy,
1113 force_all_finite=force_all_finite,
1114 ensure_2d=ensure_2d,
1115 allow_nd=allow_nd,
1116 ensure_min_samples=ensure_min_samples,
1117 ensure_min_features=ensure_min_features,
1118 estimator=estimator,
1119 input_name="X",
1120 )
1122 y = _check_y(y, multi_output=multi_output, y_numeric=y_numeric, estimator=estimator)
1124 check_consistent_length(X, y)
File D:\python\Lib\site-packages\sklearn\utils\validation.py:902, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)
900 # If input is 1D raise error
901 if array.ndim == 1:
--> 902 raise ValueError(
903 "Expected 2D array, got 1D array instead:\narray={}.\n"
904 "Reshape your data either using array.reshape(-1, 1) if "
905 "your data has a single feature or array.reshape(1, -1) "
906 "if it contains a single sample.".format(array)
907 )
909 if dtype_numeric and array.dtype.kind in "USV":
910 raise ValueError(
911 "dtype='numeric' is not compatible with arrays of bytes/strings."
912 "Convert your data to numeric values explicitly instead."
913 )
ValueError: Expected 2D array, got 1D array instead:
array=['floors' 'waterfront' 'lat' 'bedrooms' 'sqft_basement' 'view' 'bathrooms'
'sqft_living15' 'sqft_above' 'grade' 'sqft_living'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.