Thiết kế website giá rẻ

Question

I am trying to perform some preprocessing on my data for a sales prediction. i used make_column_selector to select specific columns in order to apply different encoders to different column. i was trying to create a make selector column object to access column in the feature variable X. it works well for the numerical columns but the category column are the ones giving issues. averytime i use the selector object to select column in the data for the category, i get “TypeError: unhashable type: ‘list'”

# @let start with data preprocessing

# set the seed
seed = 200

# set the feature and target variable
X = bigmart_copy.drop('Item_Outlet_Sales', axis = 1)
y = bigmart_copy.Item_Outlet_Sales

# split data into train and test set
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, shuffle = True, random_state=seed)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=seed)

# create a list of numerical column
num_selector = make_column_selector(dtype_exclude='object')

# create two set of of category list, one of ordinal type and 
# the other of nominal type
cat_selector_ord = ['Item_Fat_Content', 'Outlet_Size', 'Item_MRP_Category', 'Outlet_location_type']
cat_selector_nom = [x for x in bigmart_copy.columns if x not in cat_selector_ord]

cat_selector_nom_nom = make_column_selector(pattern=cat_selector_nom)
cat_selector_ord_ord = make_column_selector(pattern=cat_selector_ord)

# sekect this column from the data
num_cols = num_selector(X)
cat_cols_ord = cat_selector_ord_ord(X)
cat_cols_nom = cat_selector_nom_nom(X)

# initiate the preprocessor for each selctor
num_preprocessor = RobustScaler()
cat_selector_nom_preprocessor = OneHotEncoder()
cat_selector_ord_preprocessor = OrdinalEncoder()

# set the preprocessor
preprocesor = ColumnTransformer([
    ('RobustScaler', num_preprocessor, num_cols),
    ('OneHotEncoder', cat_selector_nom_preprocessor, cat_cols_nom),
    ('OrdinalEncoder', cat_selector_ord_preprocessor, cat_cols_ord)
])

# create a machine model pipeline
pipelines = {
    'Linear Regression':make_pipeline(preprocesor, LinearRegression),
    'Random Forest Regressor': make_pipeline(preprocesor, RandomForestRegressor),
    'Gradient Boost Regression':make_pipeline(preprocesor, GradientBoostingRegressor),
    'Extra Tree Regressor':make_pipeline(preprocesor, ExtraTreesRegressor)
}



TypeError                                 Traceback (most recent call last)
Cell In[29], line 27
     25 # sekect this column from the data
     26 num_cols = num_selector(X)
---> 27 cat_cols_ord = cat_selector_ord_ord(X)
     28 cat_cols_nom = cat_selector_nom_nom(X)
     30 # initiate the preprocessor for each selctor

File ~anaconda3libsite-packagessklearncompose_column_transformer.py:1121, in make_column_selector.__call__(self, df)
   1119 cols = df_row.columns
   1120 if self.pattern is not None:
-> 1121     cols = cols[cols.str.contains(self.pattern, regex=True)]
   1122 return cols.tolist()

File ~anaconda3libsite-packagespandascorestringsaccessor.py:129, in forbid_nonstring_types.<locals>._forbid_nonstring_types.<locals>.wrapper(self, *args, **kwargs)
    124     msg = (
    125         f"Cannot use .str.{func_name} with values of "
    126         f"inferred dtype '{self._inferred_dtype}'."
    127     )
    128     raise TypeError(msg)
--> 129 return func(self, *args, **kwargs)

File ~anaconda3libsite-packagespandascorestringsaccessor.py:1252, in StringMethods.contains(self, pat, case, flags, na, regex)
   1127 @forbid_nonstring_types(["bytes"])
   1128 def contains(self, pat, case=True, flags=0, na=None, regex=True):
   1129     r"""
   1130     Test if pattern or regex is contained within a string of a Series or Index.
   1131 
   (...)
   1250     dtype: bool
   1251     """
-> 1252     if regex and re.compile(pat).groups:
   1253         warnings.warn(
   1254             "This pattern is interpreted as a regular expression, and has "
   1255             "match groups. To actually get the groups, use str.extract.",
   1256             UserWarning,
   1257             stacklevel=find_stack_level(),
   1258         )
   1260     result = self._data.array._str_contains(pat, case, flags, na, regex)

File ~anaconda3libre.py:251, in compile(pattern, flags)
    249 def compile(pattern, flags=0):
    250     "Compile a regular expression pattern, returning a Pattern object."
--> 251     return _compile(pattern, flags)

File ~anaconda3libre.py:293, in _compile(pattern, flags)
    291     flags = flags.value
    292 try:
--> 293     return _cache[type(pattern), pattern, flags]
    294 except KeyError:
    295     pass

TypeError: unhashable type: 'list'

I was expecting no error to be present and expect that the code work as expected

Thiết kế website giá rẻ

Danh mục

TypeError: unhashable type: ‘list’ with make_column_selector