1
+ from __future__ import annotations
2
+
1
3
from datetime import (
2
4
datetime ,
3
5
time ,
@@ -130,8 +132,15 @@ def df_ref(datapath):
130
132
return df_ref
131
133
132
134
133
- def adjust_expected (expected : DataFrame , read_ext : str ) -> None :
135
+ def get_exp_unit (read_ext : str , engine : str | None ) -> str :
136
+ return "ns"
137
+
138
+
139
+ def adjust_expected (expected : DataFrame , read_ext : str , engine : str ) -> None :
134
140
expected .index .name = None
141
+ unit = get_exp_unit (read_ext , engine )
142
+ # error: "Index" has no attribute "as_unit"
143
+ expected .index = expected .index .as_unit (unit ) # type: ignore[attr-defined]
135
144
136
145
137
146
def xfail_datetimes_with_pyxlsb (engine , request ):
@@ -225,7 +234,7 @@ def test_usecols_list(self, request, engine, read_ext, df_ref):
225
234
xfail_datetimes_with_pyxlsb (engine , request )
226
235
227
236
expected = df_ref [["B" , "C" ]]
228
- adjust_expected (expected , read_ext )
237
+ adjust_expected (expected , read_ext , engine )
229
238
230
239
df1 = pd .read_excel (
231
240
"test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 , usecols = [0 , 2 , 3 ]
@@ -246,7 +255,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
246
255
xfail_datetimes_with_pyxlsb (engine , request )
247
256
248
257
expected = df_ref [["A" , "B" , "C" ]]
249
- adjust_expected (expected , read_ext )
258
+ adjust_expected (expected , read_ext , engine )
250
259
251
260
df2 = pd .read_excel (
252
261
"test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 , usecols = "A:D"
@@ -264,7 +273,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
264
273
tm .assert_frame_equal (df3 , expected )
265
274
266
275
expected = df_ref [["B" , "C" ]]
267
- adjust_expected (expected , read_ext )
276
+ adjust_expected (expected , read_ext , engine )
268
277
269
278
df2 = pd .read_excel (
270
279
"test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 , usecols = "A,C,D"
@@ -302,7 +311,7 @@ def test_usecols_diff_positional_int_columns_order(
302
311
xfail_datetimes_with_pyxlsb (engine , request )
303
312
304
313
expected = df_ref [["A" , "C" ]]
305
- adjust_expected (expected , read_ext )
314
+ adjust_expected (expected , read_ext , engine )
306
315
307
316
result = pd .read_excel (
308
317
"test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 , usecols = usecols
@@ -321,7 +330,7 @@ def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
321
330
xfail_datetimes_with_pyxlsb (engine , request )
322
331
323
332
expected = df_ref
324
- adjust_expected (expected , read_ext )
333
+ adjust_expected (expected , read_ext , engine )
325
334
326
335
result = pd .read_excel ("test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 )
327
336
tm .assert_frame_equal (result , expected )
@@ -330,7 +339,7 @@ def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
330
339
xfail_datetimes_with_pyxlsb (engine , request )
331
340
332
341
expected = df_ref [["C" , "D" ]]
333
- adjust_expected (expected , read_ext )
342
+ adjust_expected (expected , read_ext , engine )
334
343
335
344
result = pd .read_excel (
336
345
"test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 , usecols = "A,D:E"
@@ -428,7 +437,7 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
428
437
xfail_datetimes_with_pyxlsb (engine , request )
429
438
430
439
expected = df_ref
431
- adjust_expected (expected , read_ext )
440
+ adjust_expected (expected , read_ext , engine )
432
441
433
442
df1 = pd .read_excel ("test1" + read_ext , sheet_name = "Sheet1" , index_col = 0 )
434
443
df2 = pd .read_excel (
@@ -446,20 +455,24 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
446
455
def test_reader_special_dtypes (self , request , engine , read_ext ):
447
456
xfail_datetimes_with_pyxlsb (engine , request )
448
457
458
+ unit = get_exp_unit (read_ext , engine )
449
459
expected = DataFrame .from_dict (
450
460
{
451
461
"IntCol" : [1 , 2 , - 3 , 4 , 0 ],
452
462
"FloatCol" : [1.25 , 2.25 , 1.83 , 1.92 , 0.0000000005 ],
453
463
"BoolCol" : [True , False , True , True , False ],
454
464
"StrCol" : [1 , 2 , 3 , 4 , 5 ],
455
465
"Str2Col" : ["a" , 3 , "c" , "d" , "e" ],
456
- "DateCol" : [
457
- datetime (2013 , 10 , 30 ),
458
- datetime (2013 , 10 , 31 ),
459
- datetime (1905 , 1 , 1 ),
460
- datetime (2013 , 12 , 14 ),
461
- datetime (2015 , 3 , 14 ),
462
- ],
466
+ "DateCol" : Index (
467
+ [
468
+ datetime (2013 , 10 , 30 ),
469
+ datetime (2013 , 10 , 31 ),
470
+ datetime (1905 , 1 , 1 ),
471
+ datetime (2013 , 12 , 14 ),
472
+ datetime (2015 , 3 , 14 ),
473
+ ],
474
+ dtype = f"M8[{ unit } ]" ,
475
+ ),
463
476
},
464
477
)
465
478
basename = "test_types"
@@ -578,7 +591,7 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
578
591
actual = pd .read_excel (basename + read_ext , dtype = dtype )
579
592
tm .assert_frame_equal (actual , expected )
580
593
581
- def test_dtype_backend (self , read_ext , dtype_backend ):
594
+ def test_dtype_backend (self , read_ext , dtype_backend , engine ):
582
595
# GH#36712
583
596
if read_ext in (".xlsb" , ".xls" ):
584
597
pytest .skip (f"No engine for filetype: '{ read_ext } '" )
@@ -621,6 +634,9 @@ def test_dtype_backend(self, read_ext, dtype_backend):
621
634
expected ["j" ] = ArrowExtensionArray (pa .array ([None , None ]))
622
635
else :
623
636
expected = df
637
+ unit = get_exp_unit (read_ext , engine )
638
+ expected ["i" ] = expected ["i" ].astype (f"M8[{ unit } ]" )
639
+
624
640
tm .assert_frame_equal (result , expected )
625
641
626
642
def test_dtype_backend_and_dtype (self , read_ext ):
@@ -812,7 +828,7 @@ def test_sheet_name(self, request, read_ext, engine, df_ref):
812
828
sheet_name = "Sheet1"
813
829
814
830
expected = df_ref
815
- adjust_expected (expected , read_ext )
831
+ adjust_expected (expected , read_ext , engine )
816
832
817
833
df1 = pd .read_excel (
818
834
filename + read_ext , sheet_name = sheet_name , index_col = 0
@@ -1010,6 +1026,8 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
1010
1026
# see gh-4679
1011
1027
xfail_datetimes_with_pyxlsb (engine , request )
1012
1028
1029
+ unit = get_exp_unit (read_ext , engine )
1030
+
1013
1031
mi = MultiIndex .from_product ([["foo" , "bar" ], ["a" , "b" ]])
1014
1032
mi_file = "testmultiindex" + read_ext
1015
1033
@@ -1023,6 +1041,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
1023
1041
],
1024
1042
columns = mi ,
1025
1043
)
1044
+ expected [mi [2 ]] = expected [mi [2 ]].astype (f"M8[{ unit } ]" )
1026
1045
1027
1046
actual = pd .read_excel (
1028
1047
mi_file , sheet_name = "mi_column" , header = [0 , 1 ], index_col = 0
@@ -1102,6 +1121,9 @@ def test_read_excel_multiindex_blank_after_name(
1102
1121
1103
1122
mi_file = "testmultiindex" + read_ext
1104
1123
mi = MultiIndex .from_product ([["foo" , "bar" ], ["a" , "b" ]], names = ["c1" , "c2" ])
1124
+
1125
+ unit = get_exp_unit (read_ext , engine )
1126
+
1105
1127
expected = DataFrame (
1106
1128
[
1107
1129
[1 , 2.5 , pd .Timestamp ("2015-01-01" ), True ],
@@ -1115,6 +1137,7 @@ def test_read_excel_multiindex_blank_after_name(
1115
1137
names = ["ilvl1" , "ilvl2" ],
1116
1138
),
1117
1139
)
1140
+ expected [mi [2 ]] = expected [mi [2 ]].astype (f"M8[{ unit } ]" )
1118
1141
result = pd .read_excel (
1119
1142
mi_file ,
1120
1143
sheet_name = sheet_name ,
@@ -1218,6 +1241,8 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
1218
1241
# GH 4903
1219
1242
xfail_datetimes_with_pyxlsb (engine , request )
1220
1243
1244
+ unit = get_exp_unit (read_ext , engine )
1245
+
1221
1246
actual = pd .read_excel (
1222
1247
"testskiprows" + read_ext , sheet_name = "skiprows_list" , skiprows = [0 , 2 ]
1223
1248
)
@@ -1230,6 +1255,7 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
1230
1255
],
1231
1256
columns = ["a" , "b" , "c" , "d" ],
1232
1257
)
1258
+ expected ["c" ] = expected ["c" ].astype (f"M8[{ unit } ]" )
1233
1259
tm .assert_frame_equal (actual , expected )
1234
1260
1235
1261
actual = pd .read_excel (
@@ -1262,11 +1288,13 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
1262
1288
],
1263
1289
columns = ["a" , "b" , "c" , "d" ],
1264
1290
)
1291
+ expected ["c" ] = expected ["c" ].astype (f"M8[{ unit } ]" )
1265
1292
tm .assert_frame_equal (actual , expected )
1266
1293
1267
1294
def test_read_excel_skiprows_callable_not_in (self , request , engine , read_ext ):
1268
1295
# GH 4903
1269
1296
xfail_datetimes_with_pyxlsb (engine , request )
1297
+ unit = get_exp_unit (read_ext , engine )
1270
1298
1271
1299
actual = pd .read_excel (
1272
1300
"testskiprows" + read_ext ,
@@ -1282,6 +1310,7 @@ def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
1282
1310
],
1283
1311
columns = ["a" , "b" , "c" , "d" ],
1284
1312
)
1313
+ expected ["c" ] = expected ["c" ].astype (f"M8[{ unit } ]" )
1285
1314
tm .assert_frame_equal (actual , expected )
1286
1315
1287
1316
def test_read_excel_nrows (self , read_ext ):
@@ -1538,7 +1567,7 @@ def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
1538
1567
xfail_datetimes_with_pyxlsb (engine , request )
1539
1568
1540
1569
expected = df_ref
1541
- adjust_expected (expected , read_ext )
1570
+ adjust_expected (expected , read_ext , engine )
1542
1571
1543
1572
with pd .ExcelFile ("test1" + read_ext ) as excel :
1544
1573
df1 = pd .read_excel (excel , sheet_name = 0 , index_col = 0 )
@@ -1565,7 +1594,7 @@ def test_sheet_name(self, request, engine, read_ext, df_ref):
1565
1594
xfail_datetimes_with_pyxlsb (engine , request )
1566
1595
1567
1596
expected = df_ref
1568
- adjust_expected (expected , read_ext )
1597
+ adjust_expected (expected , read_ext , engine )
1569
1598
1570
1599
filename = "test1"
1571
1600
sheet_name = "Sheet1"
@@ -1657,11 +1686,14 @@ def test_read_datetime_multiindex(self, request, engine, read_ext):
1657
1686
f = "test_datetime_mi" + read_ext
1658
1687
with pd .ExcelFile (f ) as excel :
1659
1688
actual = pd .read_excel (excel , header = [0 , 1 ], index_col = 0 , engine = engine )
1660
- expected_column_index = MultiIndex .from_tuples (
1661
- [(pd .to_datetime ("02/29/2020" ), pd .to_datetime ("03/01/2020" ))],
1689
+
1690
+ unit = get_exp_unit (read_ext , engine )
1691
+ dti = pd .DatetimeIndex (["2020-02-29" , "2020-03-01" ], dtype = f"M8[{ unit } ]" )
1692
+ expected_column_index = MultiIndex .from_arrays (
1693
+ [dti [:1 ], dti [1 :]],
1662
1694
names = [
1663
- pd . to_datetime ( "02/29/2020" ) .to_pydatetime (),
1664
- pd . to_datetime ( "03/01/2020" ) .to_pydatetime (),
1695
+ dti [ 0 ] .to_pydatetime (),
1696
+ dti [ 1 ] .to_pydatetime (),
1665
1697
],
1666
1698
)
1667
1699
expected = DataFrame ([], index = [], columns = expected_column_index )
0 commit comments