this code raise error:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OrdinalEncoder
# Define categorical columns and mapping dictionary
categorical_cols = ['color', 'shape', 'size']
mapping = {'red': 0, 'green': 1, 'blue': 2, 'circle': 0, 'square': 1, 'triangle': 2, 'small': 0, 'medium': 1, 'large': 2}
cols = ['color','size']
# Define ColumnTransformer to preprocess categorical columns
preprocessor = ColumnTransformer(
transformers=[
('orlEncdr_with_map', Pipeline(steps=[('orlEnc_with_map', OrdinalEncoder(categories=[list(mapping.keys())], dtype=int))]), cols),
])
# Load sample data
data = pd.DataFrame({'color': ['red', 'green', 'blue', 'red'], 'shape': ['circle', 'square', 'triangle', 'triangle'], 'size': ['small', 'medium', 'large', 'medium']})
# Apply preprocessor to data
preprocessed_data = preprocessor.fit_transform(data)
# View preprocessed data
print(preprocessed_data)
Error:
ValueError Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_38148\1089712396.py in <module>
18
19 # Apply preprocessor to data
---> 20 preprocessed_data = preprocessor.fit_transform(data)
21
22 # View preprocessed data
~\Anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in fit_transform(self, X, y)
673 self._validate_remainder(X)
674
--> 675 result = self._fit_transform(X, y, _fit_transform_one)
676
677 if not result:
~\Anaconda3\lib\site-packages\sklearn\compose\_column_transformer.py in _fit_transform(self, X, y, func, fitted, column_as_strings)
604 )
605 try:
--> 606 return Parallel(n_jobs=self.n_jobs)(
607 delayed(func)(
608 transformer=clone(trans) if not fitted else trans,
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self, iterable)
1046 # remaining jobs.
1047 self._iterating = False
-> 1048 if self.dispatch_one_batch(iterator):
1049 self._iterating = self._original_iterator is not None
1050
~\Anaconda3\lib\site-packages\joblib\parallel.py in dispatch_one_batch(self, iterator)
862 return False
863 else:
--> 864 self._dispatch(tasks)
865 return True
866
~\Anaconda3\lib\site-packages\joblib\parallel.py in _dispatch(self, batch)
780 with self._lock:
781 job_idx = len(self._jobs)
--> 782 job = self._backend.apply_async(batch, callback=cb)
783 # A job can complete so quickly than its callback is
784 # called before we get here, causing self._jobs to
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in apply_async(self, func, callback)
206 def apply_async(self, func, callback=None):
207 """Schedule a func to be run"""
--> 208 result = ImmediateResult(func)
209 if callback:
210 callback(result)
~\Anaconda3\lib\site-packages\joblib\_parallel_backends.py in __init__(self, batch)
570 # Don't delay the application, to avoid keeping the input
571 # arguments in memory
--> 572 self.results = batch()
573
574 def get(self):
~\Anaconda3\lib\site-packages\joblib\parallel.py in __call__(self)
261 # change the default number of processes to -1
262 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 263 return [func(*args, **kwargs)
264 for func, args, kwargs in self.items]
265
~\Anaconda3\lib\site-packages\joblib\parallel.py in <listcomp>(.0)
261 # change the default number of processes to -1
262 with parallel_backend(self._backend, n_jobs=self._n_jobs):
--> 263 return [func(*args, **kwargs)
264 for func, args, kwargs in self.items]
265
~\Anaconda3\lib\site-packages\sklearn\utils\fixes.py in __call__(self, *args, **kwargs)
214 def __call__(self, *args, **kwargs):
215 with config_context(**self.config):
--> 216 return self.function(*args, **kwargs)
217
218
~\Anaconda3\lib\site-packages\sklearn\pipeline.py in _fit_transform_one(transformer, X, y, weight, message_clsname, message, **fit_params)
891 with _print_elapsed_time(message_clsname, message):
892 if hasattr(transformer, "fit_transform"):
--> 893 res = transformer.fit_transform(X, y, **fit_params)
894 else:
895 res = transformer.fit(X, y, **fit_params).transform(X)
~\Anaconda3\lib\site-packages\sklearn\pipeline.py in fit_transform(self, X, y, **fit_params)
432 fit_params_last_step = fit_params_steps[self.steps[-1][0]]
433 if hasattr(last_step, "fit_transform"):
--> 434 return last_step.fit_transform(Xt, y, **fit_params_last_step)
435 else:
436 return last_step.fit(Xt, y, **fit_params_last_step).transform(Xt)
~\Anaconda3\lib\site-packages\sklearn\base.py in fit_transform(self, X, y, **fit_params)
850 if y is None:
851 # fit method of arity 1 (unsupervised transformation)
--> 852 return self.fit(X, **fit_params).transform(X)
853 else:
854 # fit method of arity 2 (supervised transformation)
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in fit(self, X, y)
884
885 # `_fit` will only raise an error when `self.handle_unknown="error"`
--> 886 self._fit(X, handle_unknown=self.handle_unknown, force_all_finite="allow-nan")
887
888 if self.handle_unknown == "use_encoded_value":
~\Anaconda3\lib\site-packages\sklearn\preprocessing\_encoders.py in _fit(self, X, handle_unknown, force_all_finite)
82 if self.categories != "auto":
83 if len(self.categories) != n_features:
---> 84 raise ValueError(
85 "Shape mismatch: if categories is an array,"
86 " it has to be of shape (n_features,)."
ValueError: Shape mismatch: if categories is an array, it has to be of shape (n_features,).
if you change it in this way it works:
cols = ['size']
How can I change it to works for several columns?