@@ -149,6 +149,105 @@ def test_categorical_dtype_chunksize(self):
149
149
for actual , expected in zip (actuals , expecteds ):
150
150
tm .assert_frame_equal (actual , expected )
151
151
152
+ @pytest .mark .parametrize ('ordered' , [False , True ])
153
+ @pytest .mark .parametrize ('categories' , [
154
+ ['a' , 'b' , 'c' ],
155
+ ['a' , 'c' , 'b' ],
156
+ ['a' , 'b' , 'c' , 'd' ],
157
+ ['c' , 'b' , 'a' ],
158
+ ])
159
+ def test_categorical_categoricaldtype (self , categories , ordered ):
160
+ data = """a,b
161
+ 1,a
162
+ 1,b
163
+ 1,b
164
+ 2,c"""
165
+ expected = pd .DataFrame ({
166
+ "a" : [1 , 1 , 1 , 2 ],
167
+ "b" : Categorical (['a' , 'b' , 'b' , 'c' ],
168
+ categories = categories ,
169
+ ordered = ordered )
170
+ })
171
+ dtype = {"b" : CategoricalDtype (categories = categories ,
172
+ ordered = ordered )}
173
+ result = self .read_csv (StringIO (data ), dtype = dtype )
174
+ tm .assert_frame_equal (result , expected )
175
+
176
+ def test_categorical_categoricaldtype_unsorted (self ):
177
+ data = """a,b
178
+ 1,a
179
+ 1,b
180
+ 1,b
181
+ 2,c"""
182
+ dtype = CategoricalDtype (['c' , 'b' , 'a' ])
183
+ expected = pd .DataFrame ({
184
+ 'a' : [1 , 1 , 1 , 2 ],
185
+ 'b' : Categorical (['a' , 'b' , 'b' , 'c' ], categories = ['c' , 'b' , 'a' ])
186
+ })
187
+ result = self .read_csv (StringIO (data ), dtype = {'b' : dtype })
188
+ tm .assert_frame_equal (result , expected )
189
+
190
+ def test_categoricaldtype_coerces_numeric (self ):
191
+ dtype = {'b' : CategoricalDtype ([1 , 2 , 3 ])}
192
+ data = "b\n 1\n 1\n 2\n 3"
193
+ expected = pd .DataFrame ({'b' : Categorical ([1 , 1 , 2 , 3 ])})
194
+ result = self .read_csv (StringIO (data ), dtype = dtype )
195
+ tm .assert_frame_equal (result , expected )
196
+
197
+ def test_categoricaldtype_coerces_datetime (self ):
198
+ dtype = {
199
+ 'b' : CategoricalDtype (pd .date_range ('2017' , '2019' , freq = 'AS' ))
200
+ }
201
+ data = "b\n 2017-01-01\n 2018-01-01\n 2019-01-01"
202
+ expected = pd .DataFrame ({'b' : Categorical (dtype ['b' ].categories )})
203
+ result = self .read_csv (StringIO (data ), dtype = dtype )
204
+ tm .assert_frame_equal (result , expected )
205
+
206
+ dtype = {
207
+ 'b' : CategoricalDtype ([pd .Timestamp ("2014" )])
208
+ }
209
+ data = "b\n 2014-01-01\n 2014-01-01T00:00:00"
210
+ expected = pd .DataFrame ({'b' : Categorical ([pd .Timestamp ('2014' )] * 2 )})
211
+ result = self .read_csv (StringIO (data ), dtype = dtype )
212
+ tm .assert_frame_equal (result , expected )
213
+
214
+ def test_categoricaldtype_coerces_timedelta (self ):
215
+ dtype = {'b' : CategoricalDtype (pd .to_timedelta (['1H' , '2H' , '3H' ]))}
216
+ data = "b\n 1H\n 2H\n 3H"
217
+ expected = pd .DataFrame ({'b' : Categorical (dtype ['b' ].categories )})
218
+ result = self .read_csv (StringIO (data ), dtype = dtype )
219
+ tm .assert_frame_equal (result , expected )
220
+
221
+ def test_categoricaldtype_unexpected_categories (self ):
222
+ dtype = {'b' : CategoricalDtype (['a' , 'b' , 'd' , 'e' ])}
223
+ data = "b\n d\n a\n c\n d" # Unexpected c
224
+ expected = pd .DataFrame ({"b" : Categorical (list ('dacd' ),
225
+ dtype = dtype ['b' ])})
226
+ result = self .read_csv (StringIO (data ), dtype = dtype )
227
+ tm .assert_frame_equal (result , expected )
228
+
229
+ def test_categorical_categoricaldtype_chunksize (self ):
230
+ # GH 10153
231
+ data = """a,b
232
+ 1,a
233
+ 1,b
234
+ 1,b
235
+ 2,c"""
236
+ cats = ['a' , 'b' , 'c' ]
237
+ expecteds = [pd .DataFrame ({'a' : [1 , 1 ],
238
+ 'b' : Categorical (['a' , 'b' ],
239
+ categories = cats )}),
240
+ pd .DataFrame ({'a' : [1 , 2 ],
241
+ 'b' : Categorical (['b' , 'c' ],
242
+ categories = cats )},
243
+ index = [2 , 3 ])]
244
+ dtype = CategoricalDtype (cats )
245
+ actuals = self .read_csv (StringIO (data ), dtype = {'b' : dtype },
246
+ chunksize = 2 )
247
+
248
+ for actual , expected in zip (actuals , expecteds ):
249
+ tm .assert_frame_equal (actual , expected )
250
+
152
251
def test_empty_pass_dtype (self ):
153
252
data = 'one,two'
154
253
result = self .read_csv (StringIO (data ), dtype = {'one' : 'u1' })
0 commit comments