1
1
# pylint: disable=E1101
2
2
3
3
from pandas .compat import u , range , map
4
+ from datetime import datetime
4
5
import os
5
6
import unittest
6
7
@@ -306,6 +307,54 @@ def test_reader_closes_file(self):
306
307
307
308
self .assertTrue (f .closed )
308
309
310
+ def test_reader_special_dtypes (self ):
311
+ _skip_if_no_xlrd ()
312
+
313
+ expected = DataFrame .from_items ([
314
+ ("IntCol" , [1 , 2 , - 3 , 4 , 0 ]),
315
+ ("FloatCol" , [1.25 , 2.25 , 1.83 , 1.92 , 0.0000000005 ]),
316
+ ("BoolCol" , [True , False , True , True , False ]),
317
+ ("StrCol" , [1 , 2 , 3 , 4 , 5 ]),
318
+ ("Str2Col" , ["a" , "b" , "c" , "d" , "e" ]),
319
+ ("DateCol" , [datetime (2013 , 10 , 30 ), datetime (2013 , 10 , 31 ),
320
+ datetime (1905 , 1 , 1 ), datetime (2013 , 12 , 14 ),
321
+ datetime (2015 , 3 , 14 )])
322
+ ])
323
+
324
+ xlsx_path = os .path .join (self .dirpath , 'test_types.xlsx' )
325
+ xls_path = os .path .join (self .dirpath , 'test_types.xls' )
326
+
327
+ # should read in correctly and infer types
328
+ for path in (xls_path , xlsx_path ):
329
+ actual = read_excel (path , 'Sheet1' )
330
+ tm .assert_frame_equal (actual , expected )
331
+
332
+ # if not coercing number, then int comes in as float
333
+ float_expected = expected .copy ()
334
+ float_expected ["IntCol" ] = float_expected ["IntCol" ].astype (float )
335
+ for path in (xls_path , xlsx_path ):
336
+ actual = read_excel (path , 'Sheet1' , convert_float = False )
337
+ tm .assert_frame_equal (actual , float_expected )
338
+
339
+ # check setting Index (assuming xls and xlsx are the same here)
340
+ for icol , name in enumerate (expected .columns ):
341
+ actual = read_excel (xlsx_path , 'Sheet1' , index_col = icol )
342
+ actual2 = read_excel (xlsx_path , 'Sheet1' , index_col = name )
343
+ exp = expected .set_index (name )
344
+ tm .assert_frame_equal (actual , exp )
345
+ tm .assert_frame_equal (actual2 , exp )
346
+
347
+ # convert_float and converters should be different but both accepted
348
+ expected ["StrCol" ] = expected ["StrCol" ].apply (str )
349
+ actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str })
350
+ tm .assert_frame_equal (actual , expected )
351
+
352
+ no_convert_float = float_expected .copy ()
353
+ no_convert_float ["StrCol" ] = no_convert_float ["StrCol" ].apply (str )
354
+ actual = read_excel (xlsx_path , 'Sheet1' , converters = {"StrCol" : str },
355
+ convert_float = False )
356
+ tm .assert_frame_equal (actual , no_convert_float )
357
+
309
358
310
359
class ExcelWriterBase (SharedItems ):
311
360
# Base class for test cases to run with different Excel writers.
@@ -390,7 +439,7 @@ def test_roundtrip(self):
390
439
tm .assert_frame_equal (self .frame , recons )
391
440
392
441
self .frame .to_excel (path , 'test1' , na_rep = '88' )
393
- recons = read_excel (path , 'test1' , index_col = 0 , na_values = [88 ,88.0 ])
442
+ recons = read_excel (path , 'test1' , index_col = 0 , na_values = [88 , 88.0 ])
394
443
tm .assert_frame_equal (self .frame , recons )
395
444
396
445
def test_mixed (self ):
@@ -417,6 +466,16 @@ def test_tsframe(self):
417
466
recons = reader .parse ('test1' )
418
467
tm .assert_frame_equal (df , recons )
419
468
469
+ def test_basics_with_nan (self ):
470
+ _skip_if_no_xlrd ()
471
+ ext = self .ext
472
+ path = '__tmp_to_excel_from_excel_int_types__.' + ext
473
+ self .frame ['A' ][:5 ] = nan
474
+ self .frame .to_excel (path , 'test1' )
475
+ self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
476
+ self .frame .to_excel (path , 'test1' , header = False )
477
+ self .frame .to_excel (path , 'test1' , index = False )
478
+
420
479
def test_int_types (self ):
421
480
_skip_if_no_xlrd ()
422
481
ext = self .ext
@@ -425,20 +484,22 @@ def test_int_types(self):
425
484
for np_type in (np .int8 , np .int16 , np .int32 , np .int64 ):
426
485
427
486
with ensure_clean (path ) as path :
428
- self .frame ['A' ][:5 ] = nan
429
-
430
- self .frame .to_excel (path , 'test1' )
431
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
432
- self .frame .to_excel (path , 'test1' , header = False )
433
- self .frame .to_excel (path , 'test1' , index = False )
434
-
435
- # Test np.int values read come back as float.
487
+ # Test np.int values read come back as int (rather than float
488
+ # which is Excel's format).
436
489
frame = DataFrame (np .random .randint (- 10 , 10 , size = (10 , 2 )),
437
490
dtype = np_type )
438
491
frame .to_excel (path , 'test1' )
439
492
reader = ExcelFile (path )
440
- recons = reader .parse ('test1' ).astype (np_type )
441
- tm .assert_frame_equal (frame , recons , check_dtype = False )
493
+ recons = reader .parse ('test1' )
494
+ int_frame = frame .astype (int )
495
+ tm .assert_frame_equal (int_frame , recons )
496
+ recons2 = read_excel (path , 'test1' )
497
+ tm .assert_frame_equal (int_frame , recons2 )
498
+
499
+ # test with convert_float=False comes back as float
500
+ float_frame = frame .astype (float )
501
+ recons = read_excel (path , 'test1' , convert_float = False )
502
+ tm .assert_frame_equal (recons , float_frame )
442
503
443
504
def test_float_types (self ):
444
505
_skip_if_no_xlrd ()
@@ -447,13 +508,6 @@ def test_float_types(self):
447
508
448
509
for np_type in (np .float16 , np .float32 , np .float64 ):
449
510
with ensure_clean (path ) as path :
450
- self .frame ['A' ][:5 ] = nan
451
-
452
- self .frame .to_excel (path , 'test1' )
453
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
454
- self .frame .to_excel (path , 'test1' , header = False )
455
- self .frame .to_excel (path , 'test1' , index = False )
456
-
457
511
# Test np.float values read come back as float.
458
512
frame = DataFrame (np .random .random_sample (10 ), dtype = np_type )
459
513
frame .to_excel (path , 'test1' )
@@ -468,13 +522,6 @@ def test_bool_types(self):
468
522
469
523
for np_type in (np .bool8 , np .bool_ ):
470
524
with ensure_clean (path ) as path :
471
- self .frame ['A' ][:5 ] = nan
472
-
473
- self .frame .to_excel (path , 'test1' )
474
- self .frame .to_excel (path , 'test1' , cols = ['A' , 'B' ])
475
- self .frame .to_excel (path , 'test1' , header = False )
476
- self .frame .to_excel (path , 'test1' , index = False )
477
-
478
525
# Test np.bool values read come back as float.
479
526
frame = (DataFrame ([1 , 0 , True , False ], dtype = np_type ))
480
527
frame .to_excel (path , 'test1' )
@@ -1007,11 +1054,11 @@ def test_ExcelWriter_dispatch(self):
1007
1054
writer = ExcelWriter ('apple.xls' )
1008
1055
tm .assert_isinstance (writer , _XlwtWriter )
1009
1056
1010
-
1011
1057
def test_register_writer (self ):
1012
1058
# some awkward mocking to test out dispatch and such actually works
1013
1059
called_save = []
1014
1060
called_write_cells = []
1061
+
1015
1062
class DummyClass (ExcelWriter ):
1016
1063
called_save = False
1017
1064
called_write_cells = False
0 commit comments