Skip to content

Commit f3a6753

Browse files
authored
REF: de-duplicate Categorical _validate_foo_value (#41919)
1 parent 829d16e commit f3a6753

File tree

16 files changed

+65
-53
lines changed

16 files changed

+65
-53
lines changed

doc/source/user_guide/categorical.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -777,8 +777,8 @@ value is included in the ``categories``:
777777
df
778778
try:
779779
df.iloc[2:4, :] = [["c", 3], ["c", 3]]
780-
except ValueError as e:
781-
print("ValueError:", str(e))
780+
except TypeError as e:
781+
print("TypeError:", str(e))
782782
783783
Setting values by assigning categorical data will also check that the ``categories`` match:
784784

@@ -788,8 +788,8 @@ Setting values by assigning categorical data will also check that the ``categori
788788
df
789789
try:
790790
df.loc["j":"k", "cats"] = pd.Categorical(["b", "b"], categories=["a", "b", "c"])
791-
except ValueError as e:
792-
print("ValueError:", str(e))
791+
except TypeError as e:
792+
print("TypeError:", str(e))
793793
794794
Assigning a ``Categorical`` to parts of a column of other types will use the values:
795795

doc/source/whatsnew/v1.4.0.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,11 @@ Bug fixes
174174

175175
Categorical
176176
^^^^^^^^^^^
177-
-
177+
- Bug in setting dtype-incompatible values into a :class:`Categorical` (or ``Series`` or ``DataFrame`` backed by ``Categorical``) raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
178+
- Bug in :meth:`Categorical.searchsorted` when passing a dtype-incompatible value raising ``KeyError`` instead of ``TypeError`` (:issue:`41919`)
179+
- Bug in :meth:`Series.where` with ``CategoricalDtype`` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
180+
- Bug in :meth:`Categorical.fillna` when passing a dtype-incompatible value raising ``ValueError`` instead of ``TypeError`` (:issue:`41919`)
181+
- Bug in :meth:`Categorical.fillna` with a tuple-like category raising ``ValueError`` instead of ``TypeError`` when filling with a non-category tuple (:issue:`41919`)
178182
-
179183

180184
Datetimelike

pandas/core/arrays/categorical.py

+15-20
Original file line numberDiff line numberDiff line change
@@ -1394,17 +1394,14 @@ def map(self, mapper):
13941394
# -------------------------------------------------------------
13951395
# Validators; ideally these can be de-duplicated
13961396

1397-
def _validate_searchsorted_value(self, value):
1398-
# searchsorted is very performance sensitive. By converting codes
1399-
# to same dtype as self.codes, we get much faster performance.
1400-
if is_scalar(value):
1401-
codes = self._unbox_scalar(value)
1397+
def _validate_setitem_value(self, value):
1398+
if not is_hashable(value):
1399+
# wrap scalars and hashable-listlikes in list
1400+
return self._validate_listlike(value)
14021401
else:
1403-
locs = [self.categories.get_loc(x) for x in value]
1404-
# error: Incompatible types in assignment (expression has type
1405-
# "ndarray", variable has type "int")
1406-
codes = np.array(locs, dtype=self.codes.dtype) # type: ignore[assignment]
1407-
return codes
1402+
return self._validate_scalar(value)
1403+
1404+
_validate_searchsorted_value = _validate_setitem_value
14081405

14091406
def _validate_scalar(self, fill_value):
14101407
"""
@@ -1430,8 +1427,8 @@ def _validate_scalar(self, fill_value):
14301427
fill_value = self._unbox_scalar(fill_value)
14311428
else:
14321429
raise TypeError(
1433-
f"'fill_value={fill_value}' is not present "
1434-
"in this Categorical's categories"
1430+
"Cannot setitem on a Categorical with a new "
1431+
f"category ({fill_value}), set the categories first"
14351432
)
14361433
return fill_value
14371434

@@ -2016,37 +2013,35 @@ def __getitem__(self, key):
20162013
deprecate_ndim_indexing(result)
20172014
return result
20182015

2019-
def _validate_setitem_value(self, value):
2016+
def _validate_listlike(self, value):
2017+
# NB: here we assume scalar-like tuples have already been excluded
20202018
value = extract_array(value, extract_numpy=True)
20212019

20222020
# require identical categories set
20232021
if isinstance(value, Categorical):
20242022
if not is_dtype_equal(self.dtype, value.dtype):
2025-
raise ValueError(
2023+
raise TypeError(
20262024
"Cannot set a Categorical with another, "
20272025
"without identical categories"
20282026
)
20292027
# is_dtype_equal implies categories_match_up_to_permutation
20302028
value = self._encode_with_my_categories(value)
20312029
return value._codes
20322030

2033-
# wrap scalars and hashable-listlikes in list
2034-
rvalue = value if not is_hashable(value) else [value]
2035-
20362031
from pandas import Index
20372032

20382033
# tupleize_cols=False for e.g. test_fillna_iterable_category GH#41914
2039-
to_add = Index(rvalue, tupleize_cols=False).difference(self.categories)
2034+
to_add = Index(value, tupleize_cols=False).difference(self.categories)
20402035

20412036
# no assignments of values not in categories, but it's always ok to set
20422037
# something to np.nan
20432038
if len(to_add) and not isna(to_add).all():
2044-
raise ValueError(
2039+
raise TypeError(
20452040
"Cannot setitem on a Categorical with a new "
20462041
"category, set the categories first"
20472042
)
20482043

2049-
codes = self.categories.get_indexer(rvalue)
2044+
codes = self.categories.get_indexer(value)
20502045
return codes.astype(self._ndarray.dtype, copy=False)
20512046

20522047
def _reverse_indexer(self) -> dict[Hashable, npt.NDArray[np.intp]]:

pandas/core/internals/blocks.py

+4
Original file line numberDiff line numberDiff line change
@@ -1628,6 +1628,10 @@ def where(self, other, cond, errors="raise") -> list[Block]:
16281628
# NotImplementedError for class not implementing `__setitem__`
16291629
# TypeError for SparseArray, which implements just to raise
16301630
# a TypeError
1631+
if isinstance(result, Categorical):
1632+
# TODO: don't special-case
1633+
raise
1634+
16311635
result = type(self.values)._from_sequence(
16321636
np.where(cond, self.values, other), dtype=dtype
16331637
)

pandas/tests/arrays/categorical/test_analytics.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -186,15 +186,19 @@ def test_searchsorted(self, ordered):
186186
tm.assert_numpy_array_equal(res_ser, exp)
187187

188188
# Searching for a single value that is not from the Categorical
189-
with pytest.raises(KeyError, match="cucumber"):
189+
with pytest.raises(TypeError, match="cucumber"):
190190
cat.searchsorted("cucumber")
191-
with pytest.raises(KeyError, match="cucumber"):
191+
with pytest.raises(TypeError, match="cucumber"):
192192
ser.searchsorted("cucumber")
193193

194194
# Searching for multiple values one of each is not from the Categorical
195-
with pytest.raises(KeyError, match="cucumber"):
195+
msg = (
196+
"Cannot setitem on a Categorical with a new category, "
197+
"set the categories first"
198+
)
199+
with pytest.raises(TypeError, match=msg):
196200
cat.searchsorted(["bread", "cucumber"])
197-
with pytest.raises(KeyError, match="cucumber"):
201+
with pytest.raises(TypeError, match=msg):
198202
ser.searchsorted(["bread", "cucumber"])
199203

200204
def test_unique(self, ordered):

pandas/tests/arrays/categorical/test_indexing.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_setitem_different_unordered_raises(self, other):
7373
target = Categorical(["a", "b"], categories=["a", "b"])
7474
mask = np.array([True, False])
7575
msg = "Cannot set a Categorical with another, without identical categories"
76-
with pytest.raises(ValueError, match=msg):
76+
with pytest.raises(TypeError, match=msg):
7777
target[mask] = other[mask]
7878

7979
@pytest.mark.parametrize(
@@ -89,7 +89,7 @@ def test_setitem_same_ordered_raises(self, other):
8989
target = Categorical(["a", "b"], categories=["a", "b"], ordered=True)
9090
mask = np.array([True, False])
9191
msg = "Cannot set a Categorical with another, without identical categories"
92-
with pytest.raises(ValueError, match=msg):
92+
with pytest.raises(TypeError, match=msg):
9393
target[mask] = other[mask]
9494

9595
def test_setitem_tuple(self):
@@ -260,7 +260,7 @@ def test_where_other_categorical(self):
260260
def test_where_new_category_raises(self):
261261
ser = Series(Categorical(["a", "b", "c"]))
262262
msg = "Cannot setitem on a Categorical with a new category"
263-
with pytest.raises(ValueError, match=msg):
263+
with pytest.raises(TypeError, match=msg):
264264
ser.where([True, False, True], "d")
265265

266266
def test_where_ordered_differs_rasies(self):
@@ -270,7 +270,7 @@ def test_where_ordered_differs_rasies(self):
270270
other = Categorical(
271271
["b", "c", "a"], categories=["a", "c", "b", "d"], ordered=True
272272
)
273-
with pytest.raises(ValueError, match="without identical categories"):
273+
with pytest.raises(TypeError, match="without identical categories"):
274274
ser.where([True, False, True], other)
275275

276276

pandas/tests/arrays/categorical/test_missing.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,12 @@ def test_fillna_raises(self, fillna_kwargs, msg):
8484
# https://github.com/pandas-dev/pandas/issues/13628
8585
cat = Categorical([1, 2, 3, None, None])
8686

87-
with pytest.raises(ValueError, match=msg):
87+
if len(fillna_kwargs) == 1 and "value" in fillna_kwargs:
88+
err = TypeError
89+
else:
90+
err = ValueError
91+
92+
with pytest.raises(err, match=msg):
8893
cat.fillna(**fillna_kwargs)
8994

9095
@pytest.mark.parametrize("named", [True, False])
@@ -104,7 +109,7 @@ def test_fillna_iterable_category(self, named):
104109
# not NotImplementedError GH#41914
105110
cat = Categorical(np.array([Point(1, 0), Point(0, 1), None], dtype=object))
106111
msg = "Cannot setitem on a Categorical with a new category"
107-
with pytest.raises(ValueError, match=msg):
112+
with pytest.raises(TypeError, match=msg):
108113
cat.fillna(Point(0, 0))
109114

110115
def test_fillna_array(self):

pandas/tests/arrays/categorical/test_take.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def test_take_fill_value(self):
8181
def test_take_fill_value_new_raises(self):
8282
# https://github.com/pandas-dev/pandas/issues/23296
8383
cat = Categorical(["a", "b", "c"])
84-
xpr = r"'fill_value=d' is not present in this Categorical's categories"
84+
xpr = r"Cannot setitem on a Categorical with a new category \(d\)"
8585
with pytest.raises(TypeError, match=xpr):
8686
cat.take([0, 1, -1], fill_value="d", allow_fill=True)
8787

pandas/tests/frame/indexing/test_indexing.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -1283,7 +1283,7 @@ def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
12831283
assert isinstance(val, pd.Timedelta)
12841284

12851285

1286-
msg1 = "Cannot setitem on a Categorical with a new category, set the categories first"
1286+
msg1 = r"Cannot setitem on a Categorical with a new category( \(.*\))?, set the"
12871287
msg2 = "Cannot set a Categorical with another, without identical categories"
12881288

12891289

@@ -1348,7 +1348,7 @@ def test_loc_iloc_setitem_list_of_lists(self, orig, exp_multi_row, indexer):
13481348
tm.assert_frame_equal(df, exp_multi_row)
13491349

13501350
df = orig.copy()
1351-
with pytest.raises(ValueError, match=msg1):
1351+
with pytest.raises(TypeError, match=msg1):
13521352
indexer(df)[key, :] = [["c", 2], ["c", 2]]
13531353

13541354
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc, tm.at, tm.iat])
@@ -1367,7 +1367,7 @@ def test_loc_iloc_at_iat_setitem_single_value_in_categories(
13671367
tm.assert_frame_equal(df, exp_single_cats_value)
13681368

13691369
# "c" is not among the categories for df["cat"]
1370-
with pytest.raises(ValueError, match=msg1):
1370+
with pytest.raises(TypeError, match=msg1):
13711371
indexer(df)[key] = "c"
13721372

13731373
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
@@ -1401,7 +1401,7 @@ def test_loc_iloc_setitem_full_row_non_categorical_rhs(
14011401
tm.assert_frame_equal(df, exp_single_row)
14021402

14031403
# "c" is not among the categories for df["cat"]
1404-
with pytest.raises(ValueError, match=msg1):
1404+
with pytest.raises(TypeError, match=msg1):
14051405
indexer(df)[key, :] = ["c", 2]
14061406

14071407
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
@@ -1423,14 +1423,14 @@ def test_loc_iloc_setitem_partial_col_categorical_rhs(
14231423

14241424
# categories do not match df["cat"]'s, but "b" is among them
14251425
semi_compat = Categorical(list("bb"), categories=list("abc"))
1426-
with pytest.raises(ValueError, match=msg2):
1426+
with pytest.raises(TypeError, match=msg2):
14271427
# different categories but holdable values
14281428
# -> not sure if this should fail or pass
14291429
indexer(df)[key] = semi_compat
14301430

14311431
# categories do not match df["cat"]'s, and "c" is not among them
14321432
incompat = Categorical(list("cc"), categories=list("abc"))
1433-
with pytest.raises(ValueError, match=msg2):
1433+
with pytest.raises(TypeError, match=msg2):
14341434
# different values
14351435
indexer(df)[key] = incompat
14361436

@@ -1450,5 +1450,5 @@ def test_loc_iloc_setitem_non_categorical_rhs(
14501450
tm.assert_frame_equal(df, exp_parts_cats_col)
14511451

14521452
# "c" not part of the categories
1453-
with pytest.raises(ValueError, match=msg1):
1453+
with pytest.raises(TypeError, match=msg1):
14541454
indexer(df)[key] = ["c", "c"]

pandas/tests/frame/methods/test_fillna.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@ def test_na_actions_categorical(self):
173173
tm.assert_frame_equal(res, df_exp_fill)
174174

175175
msg = "Cannot setitem on a Categorical with a new category"
176-
with pytest.raises(ValueError, match=msg):
176+
with pytest.raises(TypeError, match=msg):
177177
df.fillna(value={"cats": 4, "vals": "c"})
178178

179179
res = df.fillna(method="pad")

pandas/tests/frame/test_stack_unstack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,7 @@ def test_unstack_fill_frame_categorical(self):
255255
tm.assert_frame_equal(result, expected)
256256

257257
# Fill with non-category results in a ValueError
258-
msg = r"'fill_value=d' is not present in"
258+
msg = r"Cannot setitem on a Categorical with a new category \(d\)"
259259
with pytest.raises(TypeError, match=msg):
260260
data.unstack(fill_value="d")
261261

pandas/tests/indexes/categorical/test_fillna.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,9 @@ def test_fillna_categorical(self):
1515

1616
cat = idx._data
1717

18-
# fill by value not in categories raises ValueError on EA, casts on CI
18+
# fill by value not in categories raises TypeError on EA, casts on CI
1919
msg = "Cannot setitem on a Categorical with a new category"
20-
with pytest.raises(ValueError, match=msg):
20+
with pytest.raises(TypeError, match=msg):
2121
cat.fillna(2.0)
2222

2323
result = idx.fillna(2.0)
@@ -48,5 +48,5 @@ def test_fillna_validates_with_no_nas(self):
4848
tm.assert_index_equal(res, ci)
4949

5050
# Same check directly on the Categorical
51-
with pytest.raises(ValueError, match=msg):
51+
with pytest.raises(TypeError, match=msg):
5252
cat.fillna(False)

pandas/tests/indexes/categorical/test_indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -322,7 +322,7 @@ def test_where_non_categories(self):
322322
tm.assert_index_equal(result, expected)
323323

324324
msg = "Cannot setitem on a Categorical with a new category"
325-
with pytest.raises(ValueError, match=msg):
325+
with pytest.raises(TypeError, match=msg):
326326
# Test the Categorical method directly
327327
ci._data.where(mask, 2)
328328

pandas/tests/indexes/categorical/test_reindex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def test_reindex_empty_index(self):
6262
def test_reindex_missing_category(self):
6363
# GH: 18185
6464
ser = Series([1, 2, 3, 1], dtype="category")
65-
msg = "'fill_value=-1' is not present in this Categorical's categories"
65+
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
6666
with pytest.raises(TypeError, match=msg):
6767
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
6868

pandas/tests/series/methods/test_fillna.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -677,14 +677,14 @@ def test_fillna_categorical_raises(self):
677677
cat = ser._values
678678

679679
msg = "Cannot setitem on a Categorical with a new category"
680-
with pytest.raises(ValueError, match=msg):
680+
with pytest.raises(TypeError, match=msg):
681681
ser.fillna("d")
682682

683683
msg2 = "Length of 'value' does not match."
684684
with pytest.raises(ValueError, match=msg2):
685685
cat.fillna(Series("d"))
686686

687-
with pytest.raises(ValueError, match=msg):
687+
with pytest.raises(TypeError, match=msg):
688688
ser.fillna({1: "d", 3: "a"})
689689

690690
msg = '"value" parameter must be a scalar or dict, but you passed a "list"'

pandas/tests/series/methods/test_shift.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ def test_shift_categorical_fill_value(self):
169169
tm.assert_equal(res, expected)
170170

171171
# check for incorrect fill_value
172-
msg = "'fill_value=f' is not present in this Categorical's categories"
172+
msg = r"Cannot setitem on a Categorical with a new category \(f\)"
173173
with pytest.raises(TypeError, match=msg):
174174
ts.shift(1, fill_value="f")
175175

0 commit comments

Comments
 (0)