6
6
"""
7
7
8
8
from datetime import datetime
9
+ import nose
9
10
10
11
import pandas .util .testing as tm
11
12
@@ -22,9 +23,8 @@ def test_raise_on_mixed_dtype_usecols(self):
22
23
1000,2000,3000
23
24
4000,5000,6000
24
25
"""
25
- msg = ("The elements of \' usecols\' "
26
- "must either be all strings "
27
- "or all integers" )
26
+ msg = ("The elements of 'usecols' must "
27
+ "either be all strings, all unicode, or all integers" )
28
28
usecols = [0 , 'b' , 2 ]
29
29
30
30
with tm .assertRaisesRegexp (ValueError , msg ):
@@ -254,3 +254,103 @@ def test_usecols_with_parse_dates_and_usecol_names(self):
254
254
usecols = [3 , 0 , 2 ],
255
255
parse_dates = parse_dates )
256
256
tm .assert_frame_equal (df , expected )
257
+
258
+ def test_usecols_with_unicode_strings (self ):
259
+ # see gh-13219
260
+
261
+ s = '''AAA,BBB,CCC,DDD
262
+ 0.056674973,8,True,a
263
+ 2.613230982,2,False,b
264
+ 3.568935038,7,False,a
265
+ '''
266
+
267
+ data = {
268
+ 'AAA' : {
269
+ 0 : 0.056674972999999997 ,
270
+ 1 : 2.6132309819999997 ,
271
+ 2 : 3.5689350380000002
272
+ },
273
+ 'BBB' : {0 : 8 , 1 : 2 , 2 : 7 }
274
+ }
275
+ expected = DataFrame (data )
276
+
277
+ df = self .read_csv (StringIO (s ), usecols = [u'AAA' , u'BBB' ])
278
+ tm .assert_frame_equal (df , expected )
279
+
280
+ def test_usecols_with_single_byte_unicode_strings (self ):
281
+ # see gh-13219
282
+
283
+ s = '''A,B,C,D
284
+ 0.056674973,8,True,a
285
+ 2.613230982,2,False,b
286
+ 3.568935038,7,False,a
287
+ '''
288
+
289
+ data = {
290
+ 'A' : {
291
+ 0 : 0.056674972999999997 ,
292
+ 1 : 2.6132309819999997 ,
293
+ 2 : 3.5689350380000002
294
+ },
295
+ 'B' : {0 : 8 , 1 : 2 , 2 : 7 }
296
+ }
297
+ expected = DataFrame (data )
298
+
299
+ df = self .read_csv (StringIO (s ), usecols = [u'A' , u'B' ])
300
+ tm .assert_frame_equal (df , expected )
301
+
302
+ def test_usecols_with_mixed_encoding_strings (self ):
303
+ s = '''AAA,BBB,CCC,DDD
304
+ 0.056674973,8,True,a
305
+ 2.613230982,2,False,b
306
+ 3.568935038,7,False,a
307
+ '''
308
+
309
+ msg = ("The elements of 'usecols' must "
310
+ "either be all strings, all unicode, or all integers" )
311
+
312
+ with tm .assertRaisesRegexp (ValueError , msg ):
313
+ self .read_csv (StringIO (s ), usecols = [u'AAA' , b'BBB' ])
314
+
315
+ with tm .assertRaisesRegexp (ValueError , msg ):
316
+ self .read_csv (StringIO (s ), usecols = [b'AAA' , u'BBB' ])
317
+
318
+ def test_usecols_with_multibyte_characters (self ):
319
+ s = '''あああ,いい,ううう,ええええ
320
+ 0.056674973,8,True,a
321
+ 2.613230982,2,False,b
322
+ 3.568935038,7,False,a
323
+ '''
324
+ data = {
325
+ 'あああ' : {
326
+ 0 : 0.056674972999999997 ,
327
+ 1 : 2.6132309819999997 ,
328
+ 2 : 3.5689350380000002
329
+ },
330
+ 'いい' : {0 : 8 , 1 : 2 , 2 : 7 }
331
+ }
332
+ expected = DataFrame (data )
333
+
334
+ df = self .read_csv (StringIO (s ), usecols = ['あああ' , 'いい' ])
335
+ tm .assert_frame_equal (df , expected )
336
+
337
+ def test_usecols_with_multibyte_unicode_characters (self ):
338
+ raise nose .SkipTest ('TODO: see gh-13253' )
339
+
340
+ s = '''あああ,いい,ううう,ええええ
341
+ 0.056674973,8,True,a
342
+ 2.613230982,2,False,b
343
+ 3.568935038,7,False,a
344
+ '''
345
+ data = {
346
+ 'あああ' : {
347
+ 0 : 0.056674972999999997 ,
348
+ 1 : 2.6132309819999997 ,
349
+ 2 : 3.5689350380000002
350
+ },
351
+ 'いい' : {0 : 8 , 1 : 2 , 2 : 7 }
352
+ }
353
+ expected = DataFrame (data )
354
+
355
+ df = self .read_csv (StringIO (s ), usecols = [u'あああ' , u'いい' ])
356
+ tm .assert_frame_equal (df , expected )
0 commit comments