Skip to content

Commit ee8e90b

Browse files
topper-123proost
authored andcommitted
BUG: loc-indexing with a CategoricalIndex with non-string categories (pandas-dev#29922)
1 parent 9a2d07c commit ee8e90b

File tree

5 files changed

+76
-6
lines changed

5 files changed

+76
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,7 @@ Indexing
743743
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
744744
- Bug in :meth:`Float64Index.astype` where ``np.inf`` was not handled properly when casting to an integer dtype (:issue:`28475`)
745745
- :meth:`Index.union` could fail when the left contained duplicates (:issue:`28257`)
746+
- Bug when indexing with ``.loc`` where the index was a :class:`CategoricalIndex` with integer and float categories, a ValueError was raised (:issue:`17569`)
746747
- :meth:`Index.get_indexer_non_unique` could fail with `TypeError` in some cases, such as when searching for ints in a string index (:issue:`28257`)
747748
- Bug in :meth:`Float64Index.get_loc` incorrectly raising ``TypeError`` instead of ``KeyError`` (:issue:`29189`)
748749

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2939,11 +2939,11 @@ def _convert_scalar_indexer(self, key, kind=None):
29392939
"unicode",
29402940
"mixed",
29412941
]:
2942-
return self._invalid_indexer("label", key)
2942+
self._invalid_indexer("label", key)
29432943

29442944
elif kind in ["loc"] and is_integer(key):
29452945
if not self.holds_integer():
2946-
return self._invalid_indexer("label", key)
2946+
self._invalid_indexer("label", key)
29472947

29482948
return key
29492949

pandas/core/indexes/category.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -696,9 +696,11 @@ def get_indexer_non_unique(self, target):
696696

697697
@Appender(_index_shared_docs["_convert_scalar_indexer"])
698698
def _convert_scalar_indexer(self, key, kind=None):
699-
if self.categories._defer_to_indexing:
700-
return self.categories._convert_scalar_indexer(key, kind=kind)
701-
699+
if kind == "loc":
700+
try:
701+
return self.categories._convert_scalar_indexer(key, kind=kind)
702+
except TypeError:
703+
self._invalid_indexer("label", key)
702704
return super()._convert_scalar_indexer(key, kind=kind)
703705

704706
@Appender(_index_shared_docs["_convert_list_indexer"])

pandas/tests/indexing/test_categorical.py

+62
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
Index,
1313
Interval,
1414
Series,
15+
Timedelta,
1516
Timestamp,
17+
conftest,
1618
)
1719
from pandas.api.types import CategoricalDtype as CDT
1820
import pandas.util.testing as tm
@@ -80,6 +82,13 @@ def test_loc_scalar(self):
8082
with pytest.raises(TypeError, match=msg):
8183
df.loc["d", "C"] = 10
8284

85+
msg = (
86+
r"cannot do label indexing on <class 'pandas\.core\.indexes\.category"
87+
r"\.CategoricalIndex'> with these indexers \[1\] of <class 'int'>"
88+
)
89+
with pytest.raises(TypeError, match=msg):
90+
df.loc[1]
91+
8392
def test_getitem_scalar(self):
8493

8594
cats = Categorical([Timestamp("12-31-1999"), Timestamp("12-31-2000")])
@@ -754,3 +763,56 @@ def test_map_with_dict_or_series(self):
754763
output = cur_index.map(mapper)
755764
# Order of categories in output can be different
756765
tm.assert_index_equal(expected, output)
766+
767+
@pytest.mark.parametrize(
768+
"idx_values",
769+
[
770+
# python types
771+
[1, 2, 3],
772+
[-1, -2, -3],
773+
[1.5, 2.5, 3.5],
774+
[-1.5, -2.5, -3.5],
775+
# numpy int/uint
776+
*[np.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_INT_DTYPES],
777+
# numpy floats
778+
*[np.array([1.5, 2.5, 3.5], dtype=dtyp) for dtyp in conftest.FLOAT_DTYPES],
779+
# numpy object
780+
np.array([1, "b", 3.5], dtype=object),
781+
# pandas scalars
782+
[Interval(1, 4), Interval(4, 6), Interval(6, 9)],
783+
[Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)],
784+
[Timedelta(1, "d"), Timedelta(2, "d"), Timedelta(3, "D")],
785+
# pandas Integer arrays
786+
*[pd.array([1, 2, 3], dtype=dtype) for dtype in conftest.ALL_EA_INT_DTYPES],
787+
# other pandas arrays
788+
pd.IntervalIndex.from_breaks([1, 4, 6, 9]).array,
789+
pd.date_range("2019-01-01", periods=3).array,
790+
pd.timedelta_range(start="1d", periods=3).array,
791+
],
792+
)
793+
def test_loc_with_non_string_categories(self, idx_values, ordered_fixture):
794+
# GH-17569
795+
cat_idx = CategoricalIndex(idx_values, ordered=ordered_fixture)
796+
df = DataFrame({"A": ["foo", "bar", "baz"]}, index=cat_idx)
797+
798+
# scalar selection
799+
result = df.loc[idx_values[0]]
800+
expected = Series(["foo"], index=["A"], name=idx_values[0])
801+
tm.assert_series_equal(result, expected)
802+
803+
# list selection
804+
result = df.loc[idx_values[:2]]
805+
expected = DataFrame(["foo", "bar"], index=cat_idx[:2], columns=["A"])
806+
tm.assert_frame_equal(result, expected)
807+
808+
# scalar assignment
809+
result = df.copy()
810+
result.loc[idx_values[0]] = "qux"
811+
expected = DataFrame({"A": ["qux", "bar", "baz"]}, index=cat_idx)
812+
tm.assert_frame_equal(result, expected)
813+
814+
# list assignment
815+
result = df.copy()
816+
result.loc[idx_values[:2], "A"] = ["qux", "qux2"]
817+
expected = DataFrame({"A": ["qux", "qux2", "baz"]}, index=cat_idx)
818+
tm.assert_frame_equal(result, expected)

pandas/tests/indexing/test_floats.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,12 @@ def test_scalar_non_numeric(self):
100100
idxr(s)[3.0]
101101

102102
# label based can be a TypeError or KeyError
103-
if s.index.inferred_type in ["string", "unicode", "mixed"]:
103+
if s.index.inferred_type in {
104+
"categorical",
105+
"string",
106+
"unicode",
107+
"mixed",
108+
}:
104109
error = KeyError
105110
msg = r"^3$"
106111
else:

0 commit comments

Comments
 (0)