Skip to content

Commit 22007d3

Browse files
authored
API: CategoricalIndex.append fallback to concat_compat (#38098)
1 parent ce0efe8 commit 22007d3

File tree

8 files changed

+41
-53
lines changed

8 files changed

+41
-53
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -454,6 +454,7 @@ Other API changes
454454
- Passing an invalid ``fill_value`` to :meth:`Series.shift` with a ``CategoricalDtype`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
455455
- Passing an invalid value to :meth:`IntervalIndex.insert` or :meth:`CategoricalIndex.insert` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
456456
- Attempting to reindex a Series with a :class:`CategoricalIndex` with an invalid ``fill_value`` now raises a ``TypeError`` instead of a ``ValueError`` (:issue:`37733`)
457+
- :meth:`CategoricalIndex.append` with an index that contains non-category values will now cast instead of raising ``TypeError`` (:issue:`38098`)
457458

458459
.. ---------------------------------------------------------------------------
459460
@@ -635,6 +636,7 @@ Indexing
635636
- Bug in :meth:`DataFrame.loc` returning and assigning elements in wrong order when indexer is differently ordered than the :class:`MultiIndex` to filter (:issue:`31330`, :issue:`34603`)
636637
- Bug in :meth:`DataFrame.loc` and :meth:`DataFrame.__getitem__` raising ``KeyError`` when columns were :class:`MultiIndex` with only one level (:issue:`29749`)
637638
- Bug in :meth:`Series.__getitem__` and :meth:`DataFrame.__getitem__` raising blank ``KeyError`` without missing keys for :class:`IntervalIndex` (:issue:`27365`)
639+
- Bug in setting a new label on a :class:`DataFrame` or :class:`Series` with a :class:`CategoricalIndex` incorrectly raising ``TypeError`` when the new label is not among the index's categories (:issue:`38098`)
638640

639641
Missing
640642
^^^^^^^

pandas/core/indexes/base.py

-6
Original file line numberDiff line numberDiff line change
@@ -4180,12 +4180,6 @@ def _coerce_scalar_to_index(self, item):
41804180

41814181
return Index([item], dtype=dtype, **self._get_attributes_dict())
41824182

4183-
def _to_safe_for_reshape(self):
4184-
"""
4185-
Convert to object if we are a categorical.
4186-
"""
4187-
return self
4188-
41894183
def _validate_fill_value(self, value):
41904184
"""
41914185
Check if the value can be inserted into our array, and convert

pandas/core/indexes/category.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -399,10 +399,6 @@ def unique(self, level=None):
399399
# of result, not self.
400400
return type(self)._simple_new(result, name=self.name)
401401

402-
def _to_safe_for_reshape(self):
403-
""" convert to object if we are a categorical """
404-
return self.astype("object")
405-
406402
def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
407403
"""
408404
Create index with target's values (move/add/delete values as necessary)
@@ -637,11 +633,19 @@ def map(self, mapper):
637633
mapped = self._values.map(mapper)
638634
return Index(mapped, name=self.name)
639635

640-
def _concat(self, to_concat: List["Index"], name: Label) -> "CategoricalIndex":
636+
def _concat(self, to_concat: List["Index"], name: Label) -> Index:
641637
# if calling index is category, don't check dtype of others
642-
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
643-
cat = self._data._from_backing_data(codes)
644-
return type(self)._simple_new(cat, name=name)
638+
try:
639+
codes = np.concatenate([self._is_dtype_compat(c).codes for c in to_concat])
640+
except TypeError:
641+
# not all to_concat elements are among our categories (or NA)
642+
from pandas.core.dtypes.concat import concat_compat
643+
644+
res = concat_compat(to_concat)
645+
return Index(res, name=name)
646+
else:
647+
cat = self._data._from_backing_data(codes)
648+
return type(self)._simple_new(cat, name=name)
645649

646650
def _delegate_method(self, name: str, *args, **kwargs):
647651
""" method delegation to the ._values """

pandas/core/indexes/multi.py

-4
Original file line numberDiff line numberDiff line change
@@ -1684,10 +1684,6 @@ def unique(self, level=None):
16841684
level = self._get_level_number(level)
16851685
return self._get_level_values(level=level, unique=True)
16861686

1687-
def _to_safe_for_reshape(self):
1688-
""" convert to object if we are a categorical """
1689-
return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
1690-
16911687
def to_frame(self, index=True, name=None):
16921688
"""
16931689
Create a DataFrame with the levels of the MultiIndex as columns.

pandas/core/reshape/pivot.py

+8-23
Original file line numberDiff line numberDiff line change
@@ -268,19 +268,13 @@ def _add_margins(
268268
margin_dummy = DataFrame(row_margin, columns=[key]).T
269269

270270
row_names = result.index.names
271-
try:
272-
# check the result column and leave floats
273-
for dtype in set(result.dtypes):
274-
cols = result.select_dtypes([dtype]).columns
275-
margin_dummy[cols] = margin_dummy[cols].apply(
276-
maybe_downcast_to_dtype, args=(dtype,)
277-
)
278-
result = result.append(margin_dummy)
279-
except TypeError:
280-
281-
# we cannot reshape, so coerce the axis
282-
result.index = result.index._to_safe_for_reshape()
283-
result = result.append(margin_dummy)
271+
# check the result column and leave floats
272+
for dtype in set(result.dtypes):
273+
cols = result.select_dtypes([dtype]).columns
274+
margin_dummy[cols] = margin_dummy[cols].apply(
275+
maybe_downcast_to_dtype, args=(dtype,)
276+
)
277+
result = result.append(margin_dummy)
284278
result.index.names = row_names
285279

286280
return result
@@ -328,16 +322,7 @@ def _all_key(key):
328322

329323
# we are going to mutate this, so need to copy!
330324
piece = piece.copy()
331-
try:
332-
piece[all_key] = margin[key]
333-
except ValueError:
334-
# we cannot reshape, so coerce the axis
335-
piece.set_axis(
336-
piece._get_axis(cat_axis)._to_safe_for_reshape(),
337-
axis=cat_axis,
338-
inplace=True,
339-
)
340-
piece[all_key] = margin[key]
325+
piece[all_key] = margin[key]
341326

342327
table_pieces.append(piece)
343328
margin_keys.append(all_key)

pandas/tests/indexes/categorical/test_category.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,10 @@ def test_append(self):
5757
expected = CategoricalIndex(list("aabbcaca"), categories=categories)
5858
tm.assert_index_equal(result, expected, exact=True)
5959

60-
# invalid objects
61-
msg = "cannot append a non-category item to a CategoricalIndex"
62-
with pytest.raises(TypeError, match=msg):
63-
ci.append(Index(["a", "d"]))
60+
# invalid objects -> cast to object via concat_compat
61+
result = ci.append(Index(["a", "d"]))
62+
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
63+
tm.assert_index_equal(result, expected, exact=True)
6464

6565
# GH14298 - if base object is not categorical -> coerce to object
6666
result = Index(["c", "a"]).append(ci)

pandas/tests/indexing/test_categorical.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,12 @@ def test_loc_scalar(self):
5757
with pytest.raises(KeyError, match=r"^'d'$"):
5858
df.loc["d"]
5959

60-
msg = "cannot append a non-category item to a CategoricalIndex"
61-
with pytest.raises(TypeError, match=msg):
62-
df.loc["d"] = 10
60+
df2 = df.copy()
61+
expected = df2.copy()
62+
expected.index = expected.index.astype(object)
63+
expected.loc["d"] = 10
64+
df2.loc["d"] = 10
65+
tm.assert_frame_equal(df2, expected)
6366

6467
msg = "'fill_value=d' is not present in this Categorical's categories"
6568
with pytest.raises(TypeError, match=msg):

pandas/tests/reshape/concat/test_categorical.py

+9-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import numpy as np
2-
import pytest
32

43
from pandas.core.dtypes.dtypes import CategoricalDtype
54

@@ -137,13 +136,18 @@ def test_categorical_index_preserver(self):
137136
).set_index("B")
138137
tm.assert_frame_equal(result, expected)
139138

140-
# wrong categories
139+
# wrong categories -> uses concat_compat, which casts to object
141140
df3 = DataFrame(
142141
{"A": a, "B": Categorical(b, categories=list("abe"))}
143142
).set_index("B")
144-
msg = "categories must match existing categories when appending"
145-
with pytest.raises(TypeError, match=msg):
146-
pd.concat([df2, df3])
143+
result = pd.concat([df2, df3])
144+
expected = pd.concat(
145+
[
146+
df2.set_axis(df2.index.astype(object), 0),
147+
df3.set_axis(df3.index.astype(object), 0),
148+
]
149+
)
150+
tm.assert_frame_equal(result, expected)
147151

148152
def test_concat_categorical_tz(self):
149153
# GH-23816

0 commit comments

Comments
 (0)