1
1
# pylint: disable=E1101
2
2
3
3
from pandas .compat import u , range , map
4
+ from datetime import datetime
4
5
import os
5
6
import unittest
6
7
@@ -306,6 +307,56 @@ def test_reader_closes_file(self):
306
307
307
308
self .assertTrue (f .closed )
308
309
310
+ def test_reader_special_dtypes (self ):
311
+ _skip_if_no_xlrd ()
312
+
313
+ expected = DataFrame .from_items ([
314
+ ("IntCol" , [1 , 2 , - 3 , 4 , 0 ]),
315
+ ("FloatCol" , [1.25 , 2.25 , 1.83 , 1.92 , 0.0000000005 ]),
316
+ ("BoolCol" , [True , False , True , True , False ]),
317
+ ("StrCol" , [1 , 2 , 3 , 4 , 5 ]),
318
+ # GH5394 - this is why convert_float isn't vectorized
319
+ ("Str2Col" , ["a" , 3 , "c" , "d" , "e" ]),
320
+ ("DateCol" , [datetime (2013 , 10 , 30 ), datetime (2013 , 10 , 31 ),
321
+ datetime (1905 , 1 , 1 ), datetime (2013 , 12 , 14 ),
322
+ datetime (2015 , 3 , 14 )])
323
+ ])
324
+
325
+ xlsx_path = os .path .join (self .dirpath , 'test_types.xlsx' )
326
+ xls_path = os .path .join (self .dirpath , 'test_types.xls' )
327
+
328
+ # should read in correctly and infer types
329
+ for path in (xls_path , xlsx_path ):
330
+ actual = read_excel (path , 'Sheet1' )
331
+ tm .assert_frame_equal (actual , expected )
332
+
333
+ # if not coercing number, then int comes in as float
334
+ float_expected = expected .copy ()
335
+ float_expected ["IntCol" ] = float_expected ["IntCol" ].astype (float )
336
+ float_expected .loc [1 , "Str2Col" ] = 3.0
337
+ for path in (xls_path , xlsx_path ):
338
+ actual = read_excel (path , 'Sheet1' , convert_float = False )
339
+ tm .assert_frame_equal (actual , float_expected )
340
+
341
+ # check setting Index (assuming xls and xlsx are the same here)
342
+ for icol , name in enumerate (expected .columns ):
343
+ actual = read_excel (xlsx_path , 'Sheet1' , index_col = icol )
344
+ actual2 = read_excel (xlsx_path , 'Sheet1' , index_col = name )
345
+ exp = expected .set_index (name )
346
+ tm .assert_frame_equal (actual , exp )
347
+ tm .assert_frame_equal (actual2 , exp )
348
+
349
+ # convert_float and converters should be different but both accepted
350
+ expected ["StrCol" ] = expected ["StrCol" ].apply (str )
351
+ actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str })
352
+ tm .assert_frame_equal (actual , expected )
353
+
354
+ no_convert_float = float_expected .copy ()
355
+ no_convert_float ["StrCol" ] = no_convert_float ["StrCol" ].apply (str )
356
+ actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str },
357
+ convert_float = False )
358
+ tm .assert_frame_equal (actual , no_convert_float )
359
+
309
360
310
361
class ExcelWriterBase (SharedItems ):
311
362
# Base class for test cases to run with different Excel writers.
@@ -390,7 +441,7 @@ def test_roundtrip(self):
390
441
tm .assert_frame_equal (self .frame , recons )
391
442
392
443
self .frame .to_excel (path , 'test1' , na_rep = '88' )
393
- recons = read_excel (path , 'test1' , index_col = 0 , na_values = [88 ,88.0 ])
444
+ recons = read_excel (path , 'test1' , index_col = 0 , na_values = [88 , 88.0 ])
394
445
tm .assert_frame_equal (self .frame , recons )
395
446
396
447
def test_mixed (self ):
@@ -417,6 +468,16 @@ def test_tsframe(self):
417
468
recons = reader .parse ('test1' )
418
469
tm .assert_frame_equal (df , recons )
419
470
471
+ def test_basics_with_nan (self ):
472
+ _skip_if_no_xlrd ()
473
+ ext = self .ext
474
+ path = '__tmp_to_excel_from_excel_int_types__.' + ext
475
+ self .frame ['A' ][:5 ] = nan
476
+ self .frame .to_excel (path , 'test1' )
477
+ self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
478
+ self .frame .to_excel (path , 'test1' , header = False )
479
+ self .frame .to_excel (path , 'test1' , index = False )
480
+
420
481
def test_int_types (self ):
421
482
_skip_if_no_xlrd ()
422
483
ext = self .ext
@@ -425,20 +486,22 @@ def test_int_types(self):
425
486
for np_type in (np .int8 , np .int16 , np .int32 , np .int64 ):
426
487
427
488
with ensure_clean (path ) as path :
428
- self .frame ['A' ][:5 ] = nan
429
-
430
- self .frame .to_excel (path , 'test1' )
431
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
432
- self .frame .to_excel (path , 'test1' , header = False )
433
- self .frame .to_excel (path , 'test1' , index = False )
434
-
435
- # Test np.int values read come back as float.
489
+ # Test np.int values read come back as int (rather than float
490
+ # which is Excel's format).
436
491
frame = DataFrame (np .random .randint (- 10 , 10 , size = (10 , 2 )),
437
492
dtype = np_type )
438
493
frame .to_excel (path , 'test1' )
439
494
reader = ExcelFile (path )
440
- recons = reader .parse ('test1' ).astype (np_type )
441
- tm .assert_frame_equal (frame , recons , check_dtype = False )
495
+ recons = reader .parse ('test1' )
496
+ int_frame = frame .astype (int )
497
+ tm .assert_frame_equal (int_frame , recons )
498
+ recons2 = read_excel (path , 'test1' )
499
+ tm .assert_frame_equal (int_frame , recons2 )
500
+
501
+ # test with convert_float=False comes back as float
502
+ float_frame = frame .astype (float )
503
+ recons = read_excel (path , 'test1' , convert_float = False )
504
+ tm .assert_frame_equal (recons , float_frame )
442
505
443
506
def test_float_types (self ):
444
507
_skip_if_no_xlrd ()
@@ -447,13 +510,6 @@ def test_float_types(self):
447
510
448
511
for np_type in (np .float16 , np .float32 , np .float64 ):
449
512
with ensure_clean (path ) as path :
450
- self .frame ['A' ][:5 ] = nan
451
-
452
- self .frame .to_excel (path , 'test1' )
453
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
454
- self .frame .to_excel (path , 'test1' , header = False )
455
- self .frame .to_excel (path , 'test1' , index = False )
456
-
457
513
# Test np.float values read come back as float.
458
514
frame = DataFrame (np .random .random_sample (10 ), dtype = np_type )
459
515
frame .to_excel (path , 'test1' )
@@ -468,13 +524,6 @@ def test_bool_types(self):
468
524
469
525
for np_type in (np .bool8 , np .bool_ ):
470
526
with ensure_clean (path ) as path :
471
- self .frame ['A' ][:5 ] = nan
472
-
473
- self .frame .to_excel (path , 'test1' )
474
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
475
- self .frame .to_excel (path , 'test1' , header = False )
476
- self .frame .to_excel (path , 'test1' , index = False )
477
-
478
527
# Test np.bool values read come back as float.
479
528
frame = (DataFrame ([1 , 0 , True , False ], dtype = np_type ))
480
529
frame .to_excel (path , 'test1' )
@@ -1007,11 +1056,11 @@ def test_ExcelWriter_dispatch(self):
1007
1056
writer = ExcelWriter ('apple.xls' )
1008
1057
tm .assert_isinstance (writer , _XlwtWriter )
1009
1058
1010
-
1011
1059
def test_register_writer (self ):
1012
1060
# some awkward mocking to test out dispatch and such actually works
1013
1061
called_save = []
1014
1062
called_write_cells = []
1063
+
1015
1064
class DummyClass (ExcelWriter ):
1016
1065
called_save = False
1017
1066
called_write_cells = False
0 commit comments