Skip to content

Commit a69dbb3

Browse files
committed
warn for categorical
1 parent 6f79282 commit a69dbb3

File tree

3 files changed

+42
-10
lines changed

3 files changed

+42
-10
lines changed

pandas/core/arrays/categorical.py

+18-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# pylint: disable=E1101,W0232
22

3+
import reprlib
34
import textwrap
45
from warnings import warn
56

@@ -1909,20 +1910,34 @@ def take_nd(self, indexer, allow_fill=None, fill_value=None):
19091910
def where(self, cond, other):
19101911
# n.b. this now preserves the type
19111912
codes = self._codes
1913+
object_msg = (
1914+
"Implicitly converting categorical to object-dtype ndarray. "
1915+
"The values `{}' are not present in this categorical's "
1916+
"categories. A future version of pandas will raise a ValueError "
1917+
"when 'other' contains different categories.\n\n"
1918+
"To preserve the current behavior, add the new categories to "
1919+
"the categorical before calling 'where', or convert the "
1920+
"categorical to a different dtype."
1921+
)
19121922

19131923
if is_scalar(other) and isna(other):
19141924
other = -1
19151925
elif is_scalar(other):
19161926
item = self.categories.get_indexer([other]).item()
19171927

19181928
if item == -1:
1919-
raise ValueError("The value '{}' is not present in "
1920-
"this Categorical's categories".format(other))
1929+
# note: when removing this, also remove CategoricalBlock.where
1930+
warn(object_msg.format(other), FutureWarning, stacklevel=2)
1931+
return np.where(cond, self, other)
1932+
19211933
other = item
19221934

19231935
elif is_categorical_dtype(other):
19241936
if not is_dtype_equal(self, other):
1925-
raise TypeError("The type of 'other' does not match.")
1937+
extra = list(other.categories.difference(self.categories))
1938+
warn(object_msg.format(reprlib.repr(extra)), FutureWarning,
1939+
stacklevel=2)
1940+
return np.where(cond, self, other)
19261941
other = _get_codes_for_values(other, self.categories)
19271942
# get the codes from other that match our categories
19281943
pass

pandas/core/internals/blocks.py

+11
Original file line numberDiff line numberDiff line change
@@ -2671,6 +2671,17 @@ def concat_same_type(self, to_concat, placement=None):
26712671
values, placement=placement or slice(0, len(values), 1),
26722672
ndim=self.ndim)
26732673

2674+
def where(self, other, cond, align=True, errors='raise',
2675+
try_cast=False, axis=0, transpose=False):
2676+
result = super(CategoricalBlock, self).where(
2677+
other, cond, align, errors, try_cast, axis, transpose
2678+
)
2679+
if result.values.dtype != self.values.dtype:
2680+
# For backwards compatability, we allow upcasting to object.
2681+
# This fallback will be removed in the future.
2682+
result = result.astype(object)
2683+
return result
2684+
26742685

26752686
class DatetimeBlock(DatetimeLikeBlockMixin, Block):
26762687
__slots__ = ()

pandas/tests/arrays/categorical/test_indexing.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,6 @@ def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
122122
tm.assert_numpy_array_equal(expected, result)
123123
tm.assert_numpy_array_equal(exp_miss, res_miss)
124124

125-
def test_where_raises(self):
126-
arr = Categorical(['a', 'b', 'c'])
127-
with pytest.raises(ValueError, match="The value 'd'"):
128-
arr.where([True, False, True], 'd')
129-
130125
def test_where_unobserved_categories(self):
131126
arr = Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'])
132127
result = arr.where([True, True, False], other='b')
@@ -140,13 +135,24 @@ def test_where_other_categorical(self):
140135
expected = Categorical(['a', 'c', 'c'], dtype=arr.dtype)
141136
tm.assert_categorical_equal(result, expected)
142137

138+
def test_where_warns(self):
139+
arr = Categorical(['a', 'b', 'c'])
140+
with tm.assert_produces_warning(FutureWarning):
141+
result = arr.where([True, False, True], 'd')
142+
143+
expected = np.array(['a', 'd', 'c'], dtype='object')
144+
tm.assert_numpy_array_equal(result, expected)
145+
143146
def test_where_ordered_differs_rasies(self):
144147
arr = Categorical(['a', 'b', 'c'], categories=['d', 'c', 'b', 'a'],
145148
ordered=True)
146149
other = Categorical(['b', 'c', 'a'], categories=['a', 'c', 'b', 'd'],
147150
ordered=True)
148-
with pytest.raises(TypeError, match="The type of"):
149-
arr.where([True, False, True], other)
151+
with tm.assert_produces_warning(FutureWarning):
152+
result = arr.where([True, False, True], other)
153+
154+
expected = np.array(['a', 'c', 'c'], dtype=object)
155+
tm.assert_numpy_array_equal(result, expected)
150156

151157

152158
@pytest.mark.parametrize("index", [True, False])

0 commit comments

Comments
 (0)