Skip to content

Commit 331db44

Browse files
jrebackpcluo
authored andcommitted
REGR: Bug in indexing with a CategoricalIndex (pandas-dev#16123)
* REGR: Bug in indexing with a CategoricalIndex closes pandas-dev#16115 * some cleaning * BUG: scalar getitem with a CI closes pandas-dev#16131
1 parent 3234601 commit 331db44

File tree

4 files changed

+108
-55
lines changed

4 files changed

+108
-55
lines changed

doc/source/whatsnew/v0.20.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -1631,7 +1631,8 @@ Indexing
16311631
- Bug in the display of ``.info()`` where a qualifier (+) would always be displayed with a ``MultiIndex`` that contains only non-strings (:issue:`15245`)
16321632
- Bug in ``pd.concat()`` where the names of ``MultiIndex`` of resulting ``DataFrame`` are not handled correctly when ``None`` is presented in the names of ``MultiIndex`` of input ``DataFrame`` (:issue:`15787`)
16331633
- Bug in ``DataFrame.sort_index()`` and ``Series.sort_index()`` where ``na_position`` doesn't work with a ``MultiIndex`` (:issue:`14784`, :issue:`16604`)
1634-
- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`)
1634+
- Bug in in ``pd.concat()`` when combining objects with a ``CategoricalIndex`` (:issue:`16111`)
1635+
- Bug in indexing with a scalar and a ``CategoricalIndex`` (:issue:`16123`)
16351636

16361637
I/O
16371638
^^^

pandas/core/indexes/category.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@
1010
is_list_like,
1111
is_interval_dtype,
1212
is_scalar)
13-
from pandas.core.common import _asarray_tuplesafe
13+
from pandas.core.common import (_asarray_tuplesafe,
14+
_values_from_object)
1415
from pandas.core.dtypes.missing import array_equivalent
1516
from pandas.core.algorithms import take_1d
1617

@@ -353,6 +354,22 @@ def get_loc(self, key, method=None):
353354
raise KeyError(key)
354355
return self._engine.get_loc(codes)
355356

357+
def get_value(self, series, key):
358+
"""
359+
Fast lookup of value from 1-dimensional ndarray. Only use this if you
360+
know what you're doing
361+
"""
362+
try:
363+
k = _values_from_object(key)
364+
k = self._convert_scalar_indexer(k, kind='getitem')
365+
indexer = self.get_loc(k)
366+
return series.iloc[indexer]
367+
except (KeyError, TypeError):
368+
pass
369+
370+
# we might be a positional inexer
371+
return super(CategoricalIndex, self).get_value(series, key)
372+
356373
def _can_reindex(self, indexer):
357374
""" always allow reindexing """
358375
pass
@@ -507,7 +524,7 @@ def _convert_list_indexer(self, keyarr, kind=None):
507524
indexer = self.categories._convert_list_indexer(keyarr, kind=kind)
508525
return Index(self.codes).get_indexer_for(indexer)
509526

510-
indexer = self.categories.get_indexer(keyarr)
527+
indexer = self.categories.get_indexer(np.asarray(keyarr))
511528
if (indexer == -1).any():
512529
raise KeyError(
513530
"a list-indexer must only "

pandas/tests/indexing/test_categorical.py

+86-51
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
import pandas as pd
66
import numpy as np
7-
from pandas import Series, DataFrame
7+
from pandas import (Series, DataFrame, Timestamp,
8+
Categorical, CategoricalIndex)
89
from pandas.util.testing import assert_series_equal, assert_frame_equal
910
from pandas.util import testing as tm
1011

@@ -66,6 +67,17 @@ def f():
6667

6768
pytest.raises(TypeError, f)
6869

70+
def test_getitem_scalar(self):
71+
72+
cats = Categorical([Timestamp('12-31-1999'),
73+
Timestamp('12-31-2000')])
74+
75+
s = Series([1, 2], index=cats)
76+
77+
expected = s.iloc[0]
78+
result = s[cats[0]]
79+
assert result == expected
80+
6981
def test_loc_listlike(self):
7082

7183
# list of labels
@@ -74,7 +86,7 @@ def test_loc_listlike(self):
7486
assert_frame_equal(result, expected, check_index_type=True)
7587

7688
result = self.df2.loc[['a', 'b', 'e']]
77-
exp_index = pd.CategoricalIndex(
89+
exp_index = CategoricalIndex(
7890
list('aaabbe'), categories=list('cabe'), name='B')
7991
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
8092
assert_frame_equal(result, expected, check_index_type=True)
@@ -86,14 +98,14 @@ def test_loc_listlike(self):
8698
df = self.df2.copy()
8799
df.loc['e'] = 20
88100
result = df.loc[['a', 'b', 'e']]
89-
exp_index = pd.CategoricalIndex(
101+
exp_index = CategoricalIndex(
90102
list('aaabbe'), categories=list('cabe'), name='B')
91103
expected = DataFrame({'A': [0, 1, 5, 2, 3, 20]}, index=exp_index)
92104
assert_frame_equal(result, expected)
93105

94106
df = self.df2.copy()
95107
result = df.loc[['a', 'b', 'e']]
96-
exp_index = pd.CategoricalIndex(
108+
exp_index = CategoricalIndex(
97109
list('aaabbe'), categories=list('cabe'), name='B')
98110
expected = DataFrame({'A': [0, 1, 5, 2, 3, np.nan]}, index=exp_index)
99111
assert_frame_equal(result, expected, check_index_type=True)
@@ -105,21 +117,21 @@ def test_loc_listlike_dtypes(self):
105117
# GH 11586
106118

107119
# unique categories and codes
108-
index = pd.CategoricalIndex(['a', 'b', 'c'])
120+
index = CategoricalIndex(['a', 'b', 'c'])
109121
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
110122

111123
# unique slice
112124
res = df.loc[['a', 'b']]
113-
exp_index = pd.CategoricalIndex(['a', 'b'],
114-
categories=index.categories)
125+
exp_index = CategoricalIndex(['a', 'b'],
126+
categories=index.categories)
115127
exp = DataFrame({'A': [1, 2], 'B': [4, 5]}, index=exp_index)
116128
tm.assert_frame_equal(res, exp, check_index_type=True)
117129

118130
# duplicated slice
119131
res = df.loc[['a', 'a', 'b']]
120132

121-
exp_index = pd.CategoricalIndex(['a', 'a', 'b'],
122-
categories=index.categories)
133+
exp_index = CategoricalIndex(['a', 'a', 'b'],
134+
categories=index.categories)
123135
exp = DataFrame({'A': [1, 1, 2], 'B': [4, 4, 5]}, index=exp_index)
124136
tm.assert_frame_equal(res, exp, check_index_type=True)
125137

@@ -130,22 +142,22 @@ def test_loc_listlike_dtypes(self):
130142
df.loc[['a', 'x']]
131143

132144
# duplicated categories and codes
133-
index = pd.CategoricalIndex(['a', 'b', 'a'])
145+
index = CategoricalIndex(['a', 'b', 'a'])
134146
df = DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]}, index=index)
135147

136148
# unique slice
137149
res = df.loc[['a', 'b']]
138150
exp = DataFrame({'A': [1, 3, 2],
139151
'B': [4, 6, 5]},
140-
index=pd.CategoricalIndex(['a', 'a', 'b']))
152+
index=CategoricalIndex(['a', 'a', 'b']))
141153
tm.assert_frame_equal(res, exp, check_index_type=True)
142154

143155
# duplicated slice
144156
res = df.loc[['a', 'a', 'b']]
145157
exp = DataFrame(
146158
{'A': [1, 3, 1, 3, 2],
147159
'B': [4, 6, 4, 6, 5
148-
]}, index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
160+
]}, index=CategoricalIndex(['a', 'a', 'a', 'a', 'b']))
149161
tm.assert_frame_equal(res, exp, check_index_type=True)
150162

151163
with tm.assertRaisesRegexp(
@@ -155,27 +167,27 @@ def test_loc_listlike_dtypes(self):
155167
df.loc[['a', 'x']]
156168

157169
# contains unused category
158-
index = pd.CategoricalIndex(
170+
index = CategoricalIndex(
159171
['a', 'b', 'a', 'c'], categories=list('abcde'))
160172
df = DataFrame({'A': [1, 2, 3, 4], 'B': [5, 6, 7, 8]}, index=index)
161173

162174
res = df.loc[['a', 'b']]
163-
exp = DataFrame({'A': [1, 3, 2],
164-
'B': [5, 7, 6]}, index=pd.CategoricalIndex(
165-
['a', 'a', 'b'], categories=list('abcde')))
175+
exp = DataFrame({'A': [1, 3, 2], 'B': [5, 7, 6]},
176+
index=CategoricalIndex(['a', 'a', 'b'],
177+
categories=list('abcde')))
166178
tm.assert_frame_equal(res, exp, check_index_type=True)
167179

168180
res = df.loc[['a', 'e']]
169181
exp = DataFrame({'A': [1, 3, np.nan], 'B': [5, 7, np.nan]},
170-
index=pd.CategoricalIndex(['a', 'a', 'e'],
171-
categories=list('abcde')))
182+
index=CategoricalIndex(['a', 'a', 'e'],
183+
categories=list('abcde')))
172184
tm.assert_frame_equal(res, exp, check_index_type=True)
173185

174186
# duplicated slice
175187
res = df.loc[['a', 'a', 'b']]
176188
exp = DataFrame({'A': [1, 3, 1, 3, 2], 'B': [5, 7, 5, 7, 6]},
177-
index=pd.CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
178-
categories=list('abcde')))
189+
index=CategoricalIndex(['a', 'a', 'a', 'a', 'b'],
190+
categories=list('abcde')))
179191
tm.assert_frame_equal(res, exp, check_index_type=True)
180192

181193
with tm.assertRaisesRegexp(
@@ -184,54 +196,77 @@ def test_loc_listlike_dtypes(self):
184196
'that are in the categories'):
185197
df.loc[['a', 'x']]
186198

199+
def test_get_indexer_array(self):
200+
arr = np.array([Timestamp('1999-12-31 00:00:00'),
201+
Timestamp('2000-12-31 00:00:00')], dtype=object)
202+
cats = [Timestamp('1999-12-31 00:00:00'),
203+
Timestamp('2000-12-31 00:00:00')]
204+
ci = CategoricalIndex(cats,
205+
categories=cats,
206+
ordered=False, dtype='category')
207+
result = ci.get_indexer(arr)
208+
expected = np.array([0, 1], dtype='intp')
209+
tm.assert_numpy_array_equal(result, expected)
210+
211+
def test_getitem_with_listlike(self):
212+
# GH 16115
213+
cats = Categorical([Timestamp('12-31-1999'),
214+
Timestamp('12-31-2000')])
215+
216+
expected = DataFrame([[1, 0], [0, 1]], dtype='uint8',
217+
index=[0, 1], columns=cats)
218+
dummies = pd.get_dummies(cats)
219+
result = dummies[[c for c in dummies.columns]]
220+
assert_frame_equal(result, expected)
221+
187222
def test_ix_categorical_index(self):
188223
# GH 12531
189-
df = pd.DataFrame(np.random.randn(3, 3),
190-
index=list('ABC'), columns=list('XYZ'))
224+
df = DataFrame(np.random.randn(3, 3),
225+
index=list('ABC'), columns=list('XYZ'))
191226
cdf = df.copy()
192-
cdf.index = pd.CategoricalIndex(df.index)
193-
cdf.columns = pd.CategoricalIndex(df.columns)
227+
cdf.index = CategoricalIndex(df.index)
228+
cdf.columns = CategoricalIndex(df.columns)
194229

195-
expect = pd.Series(df.loc['A', :], index=cdf.columns, name='A')
230+
expect = Series(df.loc['A', :], index=cdf.columns, name='A')
196231
assert_series_equal(cdf.loc['A', :], expect)
197232

198-
expect = pd.Series(df.loc[:, 'X'], index=cdf.index, name='X')
233+
expect = Series(df.loc[:, 'X'], index=cdf.index, name='X')
199234
assert_series_equal(cdf.loc[:, 'X'], expect)
200235

201-
exp_index = pd.CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
202-
expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
203-
index=exp_index)
236+
exp_index = CategoricalIndex(list('AB'), categories=['A', 'B', 'C'])
237+
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
238+
index=exp_index)
204239
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
205240

206-
exp_columns = pd.CategoricalIndex(list('XY'),
207-
categories=['X', 'Y', 'Z'])
208-
expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
209-
columns=exp_columns)
241+
exp_columns = CategoricalIndex(list('XY'),
242+
categories=['X', 'Y', 'Z'])
243+
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
244+
columns=exp_columns)
210245
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
211246

212247
# non-unique
213-
df = pd.DataFrame(np.random.randn(3, 3),
214-
index=list('ABA'), columns=list('XYX'))
248+
df = DataFrame(np.random.randn(3, 3),
249+
index=list('ABA'), columns=list('XYX'))
215250
cdf = df.copy()
216-
cdf.index = pd.CategoricalIndex(df.index)
217-
cdf.columns = pd.CategoricalIndex(df.columns)
251+
cdf.index = CategoricalIndex(df.index)
252+
cdf.columns = CategoricalIndex(df.columns)
218253

219-
exp_index = pd.CategoricalIndex(list('AA'), categories=['A', 'B'])
220-
expect = pd.DataFrame(df.loc['A', :], columns=cdf.columns,
221-
index=exp_index)
254+
exp_index = CategoricalIndex(list('AA'), categories=['A', 'B'])
255+
expect = DataFrame(df.loc['A', :], columns=cdf.columns,
256+
index=exp_index)
222257
assert_frame_equal(cdf.loc['A', :], expect)
223258

224-
exp_columns = pd.CategoricalIndex(list('XX'), categories=['X', 'Y'])
225-
expect = pd.DataFrame(df.loc[:, 'X'], index=cdf.index,
226-
columns=exp_columns)
259+
exp_columns = CategoricalIndex(list('XX'), categories=['X', 'Y'])
260+
expect = DataFrame(df.loc[:, 'X'], index=cdf.index,
261+
columns=exp_columns)
227262
assert_frame_equal(cdf.loc[:, 'X'], expect)
228263

229-
expect = pd.DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
230-
index=pd.CategoricalIndex(list('AAB')))
264+
expect = DataFrame(df.loc[['A', 'B'], :], columns=cdf.columns,
265+
index=CategoricalIndex(list('AAB')))
231266
assert_frame_equal(cdf.loc[['A', 'B'], :], expect)
232267

233-
expect = pd.DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
234-
columns=pd.CategoricalIndex(list('XXY')))
268+
expect = DataFrame(df.loc[:, ['X', 'Y']], index=cdf.index,
269+
columns=CategoricalIndex(list('XXY')))
235270
assert_frame_equal(cdf.loc[:, ['X', 'Y']], expect)
236271

237272
def test_read_only_source(self):
@@ -281,13 +316,13 @@ def test_reindexing(self):
281316
# then return a Categorical
282317
cats = list('cabe')
283318

284-
result = self.df2.reindex(pd.Categorical(['a', 'd'], categories=cats))
319+
result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
285320
expected = DataFrame({'A': [0, 1, 5, np.nan],
286321
'B': Series(list('aaad')).astype(
287322
'category', categories=cats)}).set_index('B')
288323
assert_frame_equal(result, expected, check_index_type=True)
289324

290-
result = self.df2.reindex(pd.Categorical(['a'], categories=cats))
325+
result = self.df2.reindex(Categorical(['a'], categories=cats))
291326
expected = DataFrame({'A': [0, 1, 5],
292327
'B': Series(list('aaa')).astype(
293328
'category', categories=cats)}).set_index('B')
@@ -309,15 +344,15 @@ def test_reindexing(self):
309344
assert_frame_equal(result, expected, check_index_type=True)
310345

311346
# give back the type of categorical that we received
312-
result = self.df2.reindex(pd.Categorical(
347+
result = self.df2.reindex(Categorical(
313348
['a', 'd'], categories=cats, ordered=True))
314349
expected = DataFrame(
315350
{'A': [0, 1, 5, np.nan],
316351
'B': Series(list('aaad')).astype('category', categories=cats,
317352
ordered=True)}).set_index('B')
318353
assert_frame_equal(result, expected, check_index_type=True)
319354

320-
result = self.df2.reindex(pd.Categorical(
355+
result = self.df2.reindex(Categorical(
321356
['a', 'd'], categories=['a', 'd']))
322357
expected = DataFrame({'A': [0, 1, 5, np.nan],
323358
'B': Series(list('aaad')).astype(

pandas/tests/reshape/test_reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,8 @@ def test_dataframe_dummies_with_categorical(self):
490490
'cat_x', 'cat_y']]
491491
assert_frame_equal(result, expected)
492492

493-
# GH12402 Add a new parameter `drop_first` to avoid collinearity
494493
def test_basic_drop_first(self):
494+
# GH12402 Add a new parameter `drop_first` to avoid collinearity
495495
# Basic case
496496
s_list = list('abc')
497497
s_series = Series(s_list)

0 commit comments

Comments
 (0)