4
4
5
5
import pandas as pd
6
6
import numpy as np
7
- from pandas import Series , DataFrame
7
+ from pandas import (Series , DataFrame , Timestamp ,
8
+ Categorical , CategoricalIndex )
8
9
from pandas .util .testing import assert_series_equal , assert_frame_equal
9
10
from pandas .util import testing as tm
10
11
@@ -66,6 +67,17 @@ def f():
66
67
67
68
pytest .raises (TypeError , f )
68
69
70
+ def test_getitem_scalar (self ):
71
+
72
+ cats = Categorical ([Timestamp ('12-31-1999' ),
73
+ Timestamp ('12-31-2000' )])
74
+
75
+ s = Series ([1 , 2 ], index = cats )
76
+
77
+ expected = s .iloc [0 ]
78
+ result = s [cats [0 ]]
79
+ assert result == expected
80
+
69
81
def test_loc_listlike (self ):
70
82
71
83
# list of labels
@@ -74,7 +86,7 @@ def test_loc_listlike(self):
74
86
assert_frame_equal (result , expected , check_index_type = True )
75
87
76
88
result = self .df2 .loc [['a' , 'b' , 'e' ]]
77
- exp_index = pd . CategoricalIndex (
89
+ exp_index = CategoricalIndex (
78
90
list ('aaabbe' ), categories = list ('cabe' ), name = 'B' )
79
91
expected = DataFrame ({'A' : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
80
92
assert_frame_equal (result , expected , check_index_type = True )
@@ -86,14 +98,14 @@ def test_loc_listlike(self):
86
98
df = self .df2 .copy ()
87
99
df .loc ['e' ] = 20
88
100
result = df .loc [['a' , 'b' , 'e' ]]
89
- exp_index = pd . CategoricalIndex (
101
+ exp_index = CategoricalIndex (
90
102
list ('aaabbe' ), categories = list ('cabe' ), name = 'B' )
91
103
expected = DataFrame ({'A' : [0 , 1 , 5 , 2 , 3 , 20 ]}, index = exp_index )
92
104
assert_frame_equal (result , expected )
93
105
94
106
df = self .df2 .copy ()
95
107
result = df .loc [['a' , 'b' , 'e' ]]
96
- exp_index = pd . CategoricalIndex (
108
+ exp_index = CategoricalIndex (
97
109
list ('aaabbe' ), categories = list ('cabe' ), name = 'B' )
98
110
expected = DataFrame ({'A' : [0 , 1 , 5 , 2 , 3 , np .nan ]}, index = exp_index )
99
111
assert_frame_equal (result , expected , check_index_type = True )
@@ -105,21 +117,21 @@ def test_loc_listlike_dtypes(self):
105
117
# GH 11586
106
118
107
119
# unique categories and codes
108
- index = pd . CategoricalIndex (['a' , 'b' , 'c' ])
120
+ index = CategoricalIndex (['a' , 'b' , 'c' ])
109
121
df = DataFrame ({'A' : [1 , 2 , 3 ], 'B' : [4 , 5 , 6 ]}, index = index )
110
122
111
123
# unique slice
112
124
res = df .loc [['a' , 'b' ]]
113
- exp_index = pd . CategoricalIndex (['a' , 'b' ],
114
- categories = index .categories )
125
+ exp_index = CategoricalIndex (['a' , 'b' ],
126
+ categories = index .categories )
115
127
exp = DataFrame ({'A' : [1 , 2 ], 'B' : [4 , 5 ]}, index = exp_index )
116
128
tm .assert_frame_equal (res , exp , check_index_type = True )
117
129
118
130
# duplicated slice
119
131
res = df .loc [['a' , 'a' , 'b' ]]
120
132
121
- exp_index = pd . CategoricalIndex (['a' , 'a' , 'b' ],
122
- categories = index .categories )
133
+ exp_index = CategoricalIndex (['a' , 'a' , 'b' ],
134
+ categories = index .categories )
123
135
exp = DataFrame ({'A' : [1 , 1 , 2 ], 'B' : [4 , 4 , 5 ]}, index = exp_index )
124
136
tm .assert_frame_equal (res , exp , check_index_type = True )
125
137
@@ -130,22 +142,22 @@ def test_loc_listlike_dtypes(self):
130
142
df .loc [['a' , 'x' ]]
131
143
132
144
# duplicated categories and codes
133
- index = pd . CategoricalIndex (['a' , 'b' , 'a' ])
145
+ index = CategoricalIndex (['a' , 'b' , 'a' ])
134
146
df = DataFrame ({'A' : [1 , 2 , 3 ], 'B' : [4 , 5 , 6 ]}, index = index )
135
147
136
148
# unique slice
137
149
res = df .loc [['a' , 'b' ]]
138
150
exp = DataFrame ({'A' : [1 , 3 , 2 ],
139
151
'B' : [4 , 6 , 5 ]},
140
- index = pd . CategoricalIndex (['a' , 'a' , 'b' ]))
152
+ index = CategoricalIndex (['a' , 'a' , 'b' ]))
141
153
tm .assert_frame_equal (res , exp , check_index_type = True )
142
154
143
155
# duplicated slice
144
156
res = df .loc [['a' , 'a' , 'b' ]]
145
157
exp = DataFrame (
146
158
{'A' : [1 , 3 , 1 , 3 , 2 ],
147
159
'B' : [4 , 6 , 4 , 6 , 5
148
- ]}, index = pd . CategoricalIndex (['a' , 'a' , 'a' , 'a' , 'b' ]))
160
+ ]}, index = CategoricalIndex (['a' , 'a' , 'a' , 'a' , 'b' ]))
149
161
tm .assert_frame_equal (res , exp , check_index_type = True )
150
162
151
163
with tm .assertRaisesRegexp (
@@ -155,27 +167,27 @@ def test_loc_listlike_dtypes(self):
155
167
df .loc [['a' , 'x' ]]
156
168
157
169
# contains unused category
158
- index = pd . CategoricalIndex (
170
+ index = CategoricalIndex (
159
171
['a' , 'b' , 'a' , 'c' ], categories = list ('abcde' ))
160
172
df = DataFrame ({'A' : [1 , 2 , 3 , 4 ], 'B' : [5 , 6 , 7 , 8 ]}, index = index )
161
173
162
174
res = df .loc [['a' , 'b' ]]
163
- exp = DataFrame ({'A' : [1 , 3 , 2 ],
164
- 'B' : [ 5 , 7 , 6 ]}, index = pd . CategoricalIndex (
165
- [ 'a' , 'a' , 'b' ], categories = list ('abcde' )))
175
+ exp = DataFrame ({'A' : [1 , 3 , 2 ], 'B' : [ 5 , 7 , 6 ]},
176
+ index = CategoricalIndex ([ 'a' , 'a' , 'b' ],
177
+ categories = list ('abcde' )))
166
178
tm .assert_frame_equal (res , exp , check_index_type = True )
167
179
168
180
res = df .loc [['a' , 'e' ]]
169
181
exp = DataFrame ({'A' : [1 , 3 , np .nan ], 'B' : [5 , 7 , np .nan ]},
170
- index = pd . CategoricalIndex (['a' , 'a' , 'e' ],
171
- categories = list ('abcde' )))
182
+ index = CategoricalIndex (['a' , 'a' , 'e' ],
183
+ categories = list ('abcde' )))
172
184
tm .assert_frame_equal (res , exp , check_index_type = True )
173
185
174
186
# duplicated slice
175
187
res = df .loc [['a' , 'a' , 'b' ]]
176
188
exp = DataFrame ({'A' : [1 , 3 , 1 , 3 , 2 ], 'B' : [5 , 7 , 5 , 7 , 6 ]},
177
- index = pd . CategoricalIndex (['a' , 'a' , 'a' , 'a' , 'b' ],
178
- categories = list ('abcde' )))
189
+ index = CategoricalIndex (['a' , 'a' , 'a' , 'a' , 'b' ],
190
+ categories = list ('abcde' )))
179
191
tm .assert_frame_equal (res , exp , check_index_type = True )
180
192
181
193
with tm .assertRaisesRegexp (
@@ -184,54 +196,77 @@ def test_loc_listlike_dtypes(self):
184
196
'that are in the categories' ):
185
197
df .loc [['a' , 'x' ]]
186
198
199
+ def test_get_indexer_array (self ):
200
+ arr = np .array ([Timestamp ('1999-12-31 00:00:00' ),
201
+ Timestamp ('2000-12-31 00:00:00' )], dtype = object )
202
+ cats = [Timestamp ('1999-12-31 00:00:00' ),
203
+ Timestamp ('2000-12-31 00:00:00' )]
204
+ ci = CategoricalIndex (cats ,
205
+ categories = cats ,
206
+ ordered = False , dtype = 'category' )
207
+ result = ci .get_indexer (arr )
208
+ expected = np .array ([0 , 1 ], dtype = 'intp' )
209
+ tm .assert_numpy_array_equal (result , expected )
210
+
211
+ def test_getitem_with_listlike (self ):
212
+ # GH 16115
213
+ cats = Categorical ([Timestamp ('12-31-1999' ),
214
+ Timestamp ('12-31-2000' )])
215
+
216
+ expected = DataFrame ([[1 , 0 ], [0 , 1 ]], dtype = 'uint8' ,
217
+ index = [0 , 1 ], columns = cats )
218
+ dummies = pd .get_dummies (cats )
219
+ result = dummies [[c for c in dummies .columns ]]
220
+ assert_frame_equal (result , expected )
221
+
187
222
def test_ix_categorical_index (self ):
188
223
# GH 12531
189
- df = pd . DataFrame (np .random .randn (3 , 3 ),
190
- index = list ('ABC' ), columns = list ('XYZ' ))
224
+ df = DataFrame (np .random .randn (3 , 3 ),
225
+ index = list ('ABC' ), columns = list ('XYZ' ))
191
226
cdf = df .copy ()
192
- cdf .index = pd . CategoricalIndex (df .index )
193
- cdf .columns = pd . CategoricalIndex (df .columns )
227
+ cdf .index = CategoricalIndex (df .index )
228
+ cdf .columns = CategoricalIndex (df .columns )
194
229
195
- expect = pd . Series (df .loc ['A' , :], index = cdf .columns , name = 'A' )
230
+ expect = Series (df .loc ['A' , :], index = cdf .columns , name = 'A' )
196
231
assert_series_equal (cdf .loc ['A' , :], expect )
197
232
198
- expect = pd . Series (df .loc [:, 'X' ], index = cdf .index , name = 'X' )
233
+ expect = Series (df .loc [:, 'X' ], index = cdf .index , name = 'X' )
199
234
assert_series_equal (cdf .loc [:, 'X' ], expect )
200
235
201
- exp_index = pd . CategoricalIndex (list ('AB' ), categories = ['A' , 'B' , 'C' ])
202
- expect = pd . DataFrame (df .loc [['A' , 'B' ], :], columns = cdf .columns ,
203
- index = exp_index )
236
+ exp_index = CategoricalIndex (list ('AB' ), categories = ['A' , 'B' , 'C' ])
237
+ expect = DataFrame (df .loc [['A' , 'B' ], :], columns = cdf .columns ,
238
+ index = exp_index )
204
239
assert_frame_equal (cdf .loc [['A' , 'B' ], :], expect )
205
240
206
- exp_columns = pd . CategoricalIndex (list ('XY' ),
207
- categories = ['X' , 'Y' , 'Z' ])
208
- expect = pd . DataFrame (df .loc [:, ['X' , 'Y' ]], index = cdf .index ,
209
- columns = exp_columns )
241
+ exp_columns = CategoricalIndex (list ('XY' ),
242
+ categories = ['X' , 'Y' , 'Z' ])
243
+ expect = DataFrame (df .loc [:, ['X' , 'Y' ]], index = cdf .index ,
244
+ columns = exp_columns )
210
245
assert_frame_equal (cdf .loc [:, ['X' , 'Y' ]], expect )
211
246
212
247
# non-unique
213
- df = pd . DataFrame (np .random .randn (3 , 3 ),
214
- index = list ('ABA' ), columns = list ('XYX' ))
248
+ df = DataFrame (np .random .randn (3 , 3 ),
249
+ index = list ('ABA' ), columns = list ('XYX' ))
215
250
cdf = df .copy ()
216
- cdf .index = pd . CategoricalIndex (df .index )
217
- cdf .columns = pd . CategoricalIndex (df .columns )
251
+ cdf .index = CategoricalIndex (df .index )
252
+ cdf .columns = CategoricalIndex (df .columns )
218
253
219
- exp_index = pd . CategoricalIndex (list ('AA' ), categories = ['A' , 'B' ])
220
- expect = pd . DataFrame (df .loc ['A' , :], columns = cdf .columns ,
221
- index = exp_index )
254
+ exp_index = CategoricalIndex (list ('AA' ), categories = ['A' , 'B' ])
255
+ expect = DataFrame (df .loc ['A' , :], columns = cdf .columns ,
256
+ index = exp_index )
222
257
assert_frame_equal (cdf .loc ['A' , :], expect )
223
258
224
- exp_columns = pd . CategoricalIndex (list ('XX' ), categories = ['X' , 'Y' ])
225
- expect = pd . DataFrame (df .loc [:, 'X' ], index = cdf .index ,
226
- columns = exp_columns )
259
+ exp_columns = CategoricalIndex (list ('XX' ), categories = ['X' , 'Y' ])
260
+ expect = DataFrame (df .loc [:, 'X' ], index = cdf .index ,
261
+ columns = exp_columns )
227
262
assert_frame_equal (cdf .loc [:, 'X' ], expect )
228
263
229
- expect = pd . DataFrame (df .loc [['A' , 'B' ], :], columns = cdf .columns ,
230
- index = pd . CategoricalIndex (list ('AAB' )))
264
+ expect = DataFrame (df .loc [['A' , 'B' ], :], columns = cdf .columns ,
265
+ index = CategoricalIndex (list ('AAB' )))
231
266
assert_frame_equal (cdf .loc [['A' , 'B' ], :], expect )
232
267
233
- expect = pd . DataFrame (df .loc [:, ['X' , 'Y' ]], index = cdf .index ,
234
- columns = pd . CategoricalIndex (list ('XXY' )))
268
+ expect = DataFrame (df .loc [:, ['X' , 'Y' ]], index = cdf .index ,
269
+ columns = CategoricalIndex (list ('XXY' )))
235
270
assert_frame_equal (cdf .loc [:, ['X' , 'Y' ]], expect )
236
271
237
272
def test_read_only_source (self ):
@@ -281,13 +316,13 @@ def test_reindexing(self):
281
316
# then return a Categorical
282
317
cats = list ('cabe' )
283
318
284
- result = self .df2 .reindex (pd . Categorical (['a' , 'd' ], categories = cats ))
319
+ result = self .df2 .reindex (Categorical (['a' , 'd' ], categories = cats ))
285
320
expected = DataFrame ({'A' : [0 , 1 , 5 , np .nan ],
286
321
'B' : Series (list ('aaad' )).astype (
287
322
'category' , categories = cats )}).set_index ('B' )
288
323
assert_frame_equal (result , expected , check_index_type = True )
289
324
290
- result = self .df2 .reindex (pd . Categorical (['a' ], categories = cats ))
325
+ result = self .df2 .reindex (Categorical (['a' ], categories = cats ))
291
326
expected = DataFrame ({'A' : [0 , 1 , 5 ],
292
327
'B' : Series (list ('aaa' )).astype (
293
328
'category' , categories = cats )}).set_index ('B' )
@@ -309,15 +344,15 @@ def test_reindexing(self):
309
344
assert_frame_equal (result , expected , check_index_type = True )
310
345
311
346
# give back the type of categorical that we received
312
- result = self .df2 .reindex (pd . Categorical (
347
+ result = self .df2 .reindex (Categorical (
313
348
['a' , 'd' ], categories = cats , ordered = True ))
314
349
expected = DataFrame (
315
350
{'A' : [0 , 1 , 5 , np .nan ],
316
351
'B' : Series (list ('aaad' )).astype ('category' , categories = cats ,
317
352
ordered = True )}).set_index ('B' )
318
353
assert_frame_equal (result , expected , check_index_type = True )
319
354
320
- result = self .df2 .reindex (pd . Categorical (
355
+ result = self .df2 .reindex (Categorical (
321
356
['a' , 'd' ], categories = ['a' , 'd' ]))
322
357
expected = DataFrame ({'A' : [0 , 1 , 5 , np .nan ],
323
358
'B' : Series (list ('aaad' )).astype (
0 commit comments