Skip to content

Commit 68f53cd

Browse files
authored
API: consistently raise TypeError for invalid-typed fill_value (#37733)
1 parent c4f6588 commit 68f53cd

File tree

16 files changed

+51
-68
lines changed

16 files changed

+51
-68
lines changed

doc/source/whatsnew/v1.2.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,10 @@ Other API changes
322322
^^^^^^^^^^^^^^^^^
323323

324324
- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beggining. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`)
325+
- Passing an invalid ``fill_value`` to :meth:`Categorical.take`, :meth:`DatetimeArray.take`, :meth:`TimedeltaArray.take`, :meth:`PeriodArray.take` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)
326+
- Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)
327+
- Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
328+
- Attempting to reindex a :class:`Series` with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises ``TypeError`` instead of ``ValueError`` (:issue:`37733`)
325329

326330
.. ---------------------------------------------------------------------------
327331

pandas/core/arrays/_mixins.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ def take(
7474
def _validate_fill_value(self, fill_value):
7575
"""
7676
If a fill_value is passed to `take` convert it to a representation
77-
suitable for self._ndarray, raising ValueError if this is not possible.
77+
suitable for self._ndarray, raising TypeError if this is not possible.
7878
7979
Parameters
8080
----------
@@ -86,7 +86,7 @@ def _validate_fill_value(self, fill_value):
8686
8787
Raises
8888
------
89-
ValueError
89+
TypeError
9090
"""
9191
raise AbstractMethodError(self)
9292

pandas/core/arrays/categorical.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1190,7 +1190,7 @@ def _validate_searchsorted_value(self, value):
11901190
def _validate_fill_value(self, fill_value):
11911191
"""
11921192
Convert a user-facing fill_value to a representation to use with our
1193-
underlying ndarray, raising ValueError if this is not possible.
1193+
underlying ndarray, raising TypeError if this is not possible.
11941194
11951195
Parameters
11961196
----------
@@ -1202,15 +1202,15 @@ def _validate_fill_value(self, fill_value):
12021202
12031203
Raises
12041204
------
1205-
ValueError
1205+
TypeError
12061206
"""
12071207

12081208
if is_valid_nat_for_dtype(fill_value, self.categories.dtype):
12091209
fill_value = -1
12101210
elif fill_value in self.categories:
12111211
fill_value = self._unbox_scalar(fill_value)
12121212
else:
1213-
raise ValueError(
1213+
raise TypeError(
12141214
f"'fill_value={fill_value}' is not present "
12151215
"in this Categorical's categories"
12161216
)
@@ -1659,7 +1659,6 @@ def fillna(self, value=None, method=None, limit=None):
16591659
# We get ndarray or Categorical if called via Series.fillna,
16601660
# where it will unwrap another aligned Series before getting here
16611661
codes[mask] = new_codes[mask]
1662-
16631662
else:
16641663
codes[mask] = new_codes
16651664

pandas/core/arrays/datetimelike.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,7 @@ def _validate_comparison_value(self, other):
454454
def _validate_fill_value(self, fill_value):
455455
"""
456456
If a fill_value is passed to `take` convert it to an i8 representation,
457-
raising ValueError if this is not possible.
457+
raising TypeError if this is not possible.
458458
459459
Parameters
460460
----------
@@ -466,19 +466,9 @@ def _validate_fill_value(self, fill_value):
466466
467467
Raises
468468
------
469-
ValueError
469+
TypeError
470470
"""
471-
msg = (
472-
f"'fill_value' should be a {self._scalar_type}. "
473-
f"Got '{str(fill_value)}'."
474-
)
475-
try:
476-
return self._validate_scalar(fill_value)
477-
except TypeError as err:
478-
if "Cannot compare tz-naive and tz-aware" in str(err):
479-
# tzawareness-compat
480-
raise
481-
raise ValueError(msg) from err
471+
return self._validate_scalar(fill_value)
482472

483473
def _validate_shift_value(self, fill_value):
484474
# TODO(2.0): once this deprecation is enforced, use _validate_fill_value

pandas/core/arrays/interval.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ def fillna(self, value=None, method=None, limit=None):
649649
if limit is not None:
650650
raise TypeError("limit is not supported for IntervalArray.")
651651

652-
value_left, value_right = self._validate_fillna_value(value)
652+
value_left, value_right = self._validate_fill_value(value)
653653

654654
left = self.left.fillna(value=value_left)
655655
right = self.right.fillna(value=value_right)
@@ -870,25 +870,14 @@ def _validate_scalar(self, value):
870870
# GH#18295
871871
left = right = value
872872
else:
873-
raise ValueError(
873+
raise TypeError(
874874
"can only insert Interval objects and NA into an IntervalArray"
875875
)
876876
return left, right
877877

878878
def _validate_fill_value(self, value):
879879
return self._validate_scalar(value)
880880

881-
def _validate_fillna_value(self, value):
882-
# This mirrors Datetimelike._validate_fill_value
883-
try:
884-
return self._validate_scalar(value)
885-
except ValueError as err:
886-
msg = (
887-
"'IntervalArray.fillna' only supports filling with a "
888-
f"scalar 'pandas.Interval'. Got a '{type(value).__name__}' instead."
889-
)
890-
raise TypeError(msg) from err
891-
892881
def _validate_setitem_value(self, value):
893882
needs_float_conversion = False
894883

pandas/core/indexes/multi.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -3660,7 +3660,12 @@ def insert(self, loc: int, item):
36603660
# must insert at end otherwise you have to recompute all the
36613661
# other codes
36623662
lev_loc = len(level)
3663-
level = level.insert(lev_loc, k)
3663+
try:
3664+
level = level.insert(lev_loc, k)
3665+
except TypeError:
3666+
# TODO: Should this be done inside insert?
3667+
# TODO: smarter casting rules?
3668+
level = level.astype(object).insert(lev_loc, k)
36643669
else:
36653670
lev_loc = level.get_loc(k)
36663671

pandas/tests/arrays/categorical/test_take.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def test_take_fill_value_new_raises(self):
7979
# https://github.com/pandas-dev/pandas/issues/23296
8080
cat = Categorical(["a", "b", "c"])
8181
xpr = r"'fill_value=d' is not present in this Categorical's categories"
82-
with pytest.raises(ValueError, match=xpr):
82+
with pytest.raises(TypeError, match=xpr):
8383
cat.take([0, 1, -1], fill_value="d", allow_fill=True)
8484

8585
def test_take_nd_deprecated(self):

pandas/tests/arrays/test_datetimelike.py

+17-22
Original file line numberDiff line numberDiff line change
@@ -145,8 +145,8 @@ def test_take_fill_raises(self, fill_value):
145145

146146
arr = self.array_cls._simple_new(data, freq="D")
147147

148-
msg = f"'fill_value' should be a {self.dtype}. Got '{fill_value}'"
149-
with pytest.raises(ValueError, match=msg):
148+
msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
149+
with pytest.raises(TypeError, match=msg):
150150
arr.take([0, 1], allow_fill=True, fill_value=fill_value)
151151

152152
def test_take_fill(self):
@@ -169,8 +169,8 @@ def test_take_fill_str(self, arr1d):
169169
expected = arr1d[[-1, 1]]
170170
tm.assert_equal(result, expected)
171171

172-
msg = r"'fill_value' should be a <.*>\. Got 'foo'"
173-
with pytest.raises(ValueError, match=msg):
172+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
173+
with pytest.raises(TypeError, match=msg):
174174
arr1d.take([-1, 1], allow_fill=True, fill_value="foo")
175175

176176
def test_concat_same_type(self):
@@ -745,13 +745,12 @@ def test_take_fill_valid(self, arr1d):
745745
result = arr.take([-1, 1], allow_fill=True, fill_value=now)
746746
assert result[0] == now
747747

748-
msg = f"'fill_value' should be a {self.dtype}. Got '0 days 00:00:00'."
749-
with pytest.raises(ValueError, match=msg):
748+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
749+
with pytest.raises(TypeError, match=msg):
750750
# fill_value Timedelta invalid
751751
arr.take([-1, 1], allow_fill=True, fill_value=now - now)
752752

753-
msg = f"'fill_value' should be a {self.dtype}. Got '2014Q1'."
754-
with pytest.raises(ValueError, match=msg):
753+
with pytest.raises(TypeError, match=msg):
755754
# fill_value Period invalid
756755
arr.take([-1, 1], allow_fill=True, fill_value=pd.Period("2014Q1"))
757756

@@ -763,14 +762,13 @@ def test_take_fill_valid(self, arr1d):
763762
arr.take([-1, 1], allow_fill=True, fill_value=now)
764763

765764
value = pd.NaT.value
766-
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
767-
with pytest.raises(ValueError, match=msg):
765+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
766+
with pytest.raises(TypeError, match=msg):
768767
# require NaT, not iNaT, as it could be confused with an integer
769768
arr.take([-1, 1], allow_fill=True, fill_value=value)
770769

771770
value = np.timedelta64("NaT", "ns")
772-
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
773-
with pytest.raises(ValueError, match=msg):
771+
with pytest.raises(TypeError, match=msg):
774772
# require appropriate-dtype if we have a NA value
775773
arr.take([-1, 1], allow_fill=True, fill_value=value)
776774

@@ -932,20 +930,18 @@ def test_take_fill_valid(self, timedelta_index):
932930

933931
now = pd.Timestamp.now()
934932
value = now
935-
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
936-
with pytest.raises(ValueError, match=msg):
933+
msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got"
934+
with pytest.raises(TypeError, match=msg):
937935
# fill_value Timestamp invalid
938936
arr.take([0, 1], allow_fill=True, fill_value=value)
939937

940938
value = now.to_period("D")
941-
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
942-
with pytest.raises(ValueError, match=msg):
939+
with pytest.raises(TypeError, match=msg):
943940
# fill_value Period invalid
944941
arr.take([0, 1], allow_fill=True, fill_value=value)
945942

946943
value = np.datetime64("NaT", "ns")
947-
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
948-
with pytest.raises(ValueError, match=msg):
944+
with pytest.raises(TypeError, match=msg):
949945
# require appropriate-dtype if we have a NA value
950946
arr.take([-1, 1], allow_fill=True, fill_value=value)
951947

@@ -981,14 +977,13 @@ def test_take_fill_valid(self, arr1d):
981977
arr = arr1d
982978

983979
value = pd.NaT.value
984-
msg = f"'fill_value' should be a {self.dtype}. Got '{value}'."
985-
with pytest.raises(ValueError, match=msg):
980+
msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got"
981+
with pytest.raises(TypeError, match=msg):
986982
# require NaT, not iNaT, as it could be confused with an integer
987983
arr.take([-1, 1], allow_fill=True, fill_value=value)
988984

989985
value = np.timedelta64("NaT", "ns")
990-
msg = f"'fill_value' should be a {self.dtype}. Got '{str(value)}'."
991-
with pytest.raises(ValueError, match=msg):
986+
with pytest.raises(TypeError, match=msg):
992987
# require appropriate-dtype if we have a NA value
993988
arr.take([-1, 1], allow_fill=True, fill_value=value)
994989

pandas/tests/arrays/test_period.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,8 @@ def test_take_raises():
113113
with pytest.raises(IncompatibleFrequency, match="freq"):
114114
arr.take([0, -1], allow_fill=True, fill_value=pd.Period("2000", freq="W"))
115115

116-
with pytest.raises(ValueError, match="foo"):
116+
msg = "value should be a 'Period' or 'NaT'. Got 'str' instead"
117+
with pytest.raises(TypeError, match=msg):
117118
arr.take([0, -1], allow_fill=True, fill_value="foo")
118119

119120

pandas/tests/extension/test_interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ def test_fillna_limit_backfill(self):
136136
def test_fillna_series(self):
137137
pass
138138

139-
def test_non_scalar_raises(self, data_missing):
140-
msg = "Got a 'list' instead."
139+
def test_fillna_non_scalar_raises(self, data_missing):
140+
msg = "can only insert Interval objects and NA into an IntervalArray"
141141
with pytest.raises(TypeError, match=msg):
142142
data_missing.fillna([1, 1])
143143

pandas/tests/frame/test_stack_unstack.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@ def test_unstack_fill_frame_categorical(self):
245245

246246
# Fill with non-category results in a ValueError
247247
msg = r"'fill_value=d' is not present in"
248-
with pytest.raises(ValueError, match=msg):
248+
with pytest.raises(TypeError, match=msg):
249249
data.unstack(fill_value="d")
250250

251251
# Fill with category value replaces missing values as expected

pandas/tests/indexes/categorical/test_category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def test_insert(self):
172172

173173
# invalid
174174
msg = "'fill_value=d' is not present in this Categorical's categories"
175-
with pytest.raises(ValueError, match=msg):
175+
with pytest.raises(TypeError, match=msg):
176176
ci.insert(0, "d")
177177

178178
# GH 18295 (test missing)
@@ -184,7 +184,7 @@ def test_insert(self):
184184
def test_insert_na_mismatched_dtype(self):
185185
ci = CategoricalIndex([0, 1, 1])
186186
msg = "'fill_value=NaT' is not present in this Categorical's categories"
187-
with pytest.raises(ValueError, match=msg):
187+
with pytest.raises(TypeError, match=msg):
188188
ci.insert(0, pd.NaT)
189189

190190
def test_delete(self):

pandas/tests/indexes/categorical/test_reindex.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,5 +57,5 @@ def test_reindex_missing_category(self):
5757
# GH: 18185
5858
ser = Series([1, 2, 3, 1], dtype="category")
5959
msg = "'fill_value=-1' is not present in this Categorical's categories"
60-
with pytest.raises(ValueError, match=msg):
60+
with pytest.raises(TypeError, match=msg):
6161
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)

pandas/tests/indexes/interval/test_interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def test_insert(self, data):
192192

193193
# invalid type
194194
msg = "can only insert Interval objects and NA into an IntervalArray"
195-
with pytest.raises(ValueError, match=msg):
195+
with pytest.raises(TypeError, match=msg):
196196
data.insert(1, "foo")
197197

198198
# invalid closed
@@ -213,7 +213,7 @@ def test_insert(self, data):
213213
if data.left.dtype.kind not in ["m", "M"]:
214214
# trying to insert pd.NaT into a numeric-dtyped Index should cast/raise
215215
msg = "can only insert Interval objects and NA into an IntervalArray"
216-
with pytest.raises(ValueError, match=msg):
216+
with pytest.raises(TypeError, match=msg):
217217
result = data.insert(1, pd.NaT)
218218
else:
219219
result = data.insert(1, pd.NaT)

pandas/tests/indexing/test_categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,9 @@ def test_loc_scalar(self):
6060
df.loc["d"] = 10
6161

6262
msg = "'fill_value=d' is not present in this Categorical's categories"
63-
with pytest.raises(ValueError, match=msg):
63+
with pytest.raises(TypeError, match=msg):
6464
df.loc["d", "A"] = 10
65-
with pytest.raises(ValueError, match=msg):
65+
with pytest.raises(TypeError, match=msg):
6666
df.loc["d", "C"] = 10
6767

6868
with pytest.raises(KeyError, match="^1$"):

pandas/tests/series/methods/test_shift.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def test_shift_categorical_fill_value(self):
170170

171171
# check for incorrect fill_value
172172
msg = "'fill_value=f' is not present in this Categorical's categories"
173-
with pytest.raises(ValueError, match=msg):
173+
with pytest.raises(TypeError, match=msg):
174174
ts.shift(1, fill_value="f")
175175

176176
def test_shift_dst(self):

0 commit comments

Comments
 (0)