Skip to content

Commit d44f6c1

Browse files
authored
TST: dt64 units (#56261)
* TST: dt64 units * fix on older pythons * typo fixup * mypy fixup * de-xfail
1 parent 1bb4edc commit d44f6c1

File tree

5 files changed

+99
-46
lines changed

5 files changed

+99
-46
lines changed

pandas/core/dtypes/missing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -686,7 +686,8 @@ def na_value_for_dtype(dtype: DtypeObj, compat: bool = True):
686686
if isinstance(dtype, ExtensionDtype):
687687
return dtype.na_value
688688
elif dtype.kind in "mM":
689-
return dtype.type("NaT", "ns")
689+
unit = np.datetime_data(dtype)[0]
690+
return dtype.type("NaT", unit)
690691
elif dtype.kind == "f":
691692
return np.nan
692693
elif dtype.kind in "iu":

pandas/tests/frame/methods/test_reset_index.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -664,11 +664,8 @@ def test_reset_index_dtypes_on_empty_frame_with_multiindex(array, dtype):
664664

665665
def test_reset_index_empty_frame_with_datetime64_multiindex():
666666
# https://github.com/pandas-dev/pandas/issues/35606
667-
idx = MultiIndex(
668-
levels=[[Timestamp("2020-07-20 00:00:00")], [3, 4]],
669-
codes=[[], []],
670-
names=["a", "b"],
671-
)
667+
dti = pd.DatetimeIndex(["2020-07-20 00:00:00"], dtype="M8[ns]")
668+
idx = MultiIndex.from_product([dti, [3, 4]], names=["a", "b"])[:0]
672669
df = DataFrame(index=idx, columns=["c", "d"])
673670
result = df.reset_index()
674671
expected = DataFrame(
@@ -681,7 +678,8 @@ def test_reset_index_empty_frame_with_datetime64_multiindex():
681678

682679
def test_reset_index_empty_frame_with_datetime64_multiindex_from_groupby():
683680
# https://github.com/pandas-dev/pandas/issues/35657
684-
df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": pd.to_datetime("2020-01-01")})
681+
dti = pd.DatetimeIndex(["2020-01-01"], dtype="M8[ns]")
682+
df = DataFrame({"c1": [10.0], "c2": ["a"], "c3": dti})
685683
df = df.head(0).groupby(["c2", "c3"])[["c1"]].sum()
686684
result = df.reset_index()
687685
expected = DataFrame(

pandas/tests/indexes/interval/test_formats.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from pandas import (
55
DataFrame,
6+
DatetimeIndex,
67
Index,
78
Interval,
89
IntervalIndex,
@@ -100,18 +101,14 @@ def test_get_values_for_csv(self, tuples, closed, expected_data):
100101
expected = np.array(expected_data)
101102
tm.assert_numpy_array_equal(result, expected)
102103

103-
def test_timestamp_with_timezone(self):
104+
def test_timestamp_with_timezone(self, unit):
104105
# GH 55035
105-
index = IntervalIndex(
106-
[
107-
Interval(
108-
Timestamp("2020-01-01", tz="UTC"), Timestamp("2020-01-02", tz="UTC")
109-
)
110-
]
111-
)
106+
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
107+
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
108+
index = IntervalIndex.from_arrays(left, right)
112109
result = repr(index)
113110
expected = (
114111
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
115-
"dtype='interval[datetime64[ns, UTC], right]')"
112+
f"dtype='interval[datetime64[{unit}, UTC], right]')"
116113
)
117114
assert result == expected

pandas/tests/io/excel/test_readers.py

+55-23
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from __future__ import annotations
2+
13
from datetime import (
24
datetime,
35
time,
@@ -130,8 +132,15 @@ def df_ref(datapath):
130132
return df_ref
131133

132134

133-
def adjust_expected(expected: DataFrame, read_ext: str) -> None:
135+
def get_exp_unit(read_ext: str, engine: str | None) -> str:
136+
return "ns"
137+
138+
139+
def adjust_expected(expected: DataFrame, read_ext: str, engine: str) -> None:
134140
expected.index.name = None
141+
unit = get_exp_unit(read_ext, engine)
142+
# error: "Index" has no attribute "as_unit"
143+
expected.index = expected.index.as_unit(unit) # type: ignore[attr-defined]
135144

136145

137146
def xfail_datetimes_with_pyxlsb(engine, request):
@@ -225,7 +234,7 @@ def test_usecols_list(self, request, engine, read_ext, df_ref):
225234
xfail_datetimes_with_pyxlsb(engine, request)
226235

227236
expected = df_ref[["B", "C"]]
228-
adjust_expected(expected, read_ext)
237+
adjust_expected(expected, read_ext, engine)
229238

230239
df1 = pd.read_excel(
231240
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=[0, 2, 3]
@@ -246,7 +255,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
246255
xfail_datetimes_with_pyxlsb(engine, request)
247256

248257
expected = df_ref[["A", "B", "C"]]
249-
adjust_expected(expected, read_ext)
258+
adjust_expected(expected, read_ext, engine)
250259

251260
df2 = pd.read_excel(
252261
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A:D"
@@ -264,7 +273,7 @@ def test_usecols_str(self, request, engine, read_ext, df_ref):
264273
tm.assert_frame_equal(df3, expected)
265274

266275
expected = df_ref[["B", "C"]]
267-
adjust_expected(expected, read_ext)
276+
adjust_expected(expected, read_ext, engine)
268277

269278
df2 = pd.read_excel(
270279
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,C,D"
@@ -302,7 +311,7 @@ def test_usecols_diff_positional_int_columns_order(
302311
xfail_datetimes_with_pyxlsb(engine, request)
303312

304313
expected = df_ref[["A", "C"]]
305-
adjust_expected(expected, read_ext)
314+
adjust_expected(expected, read_ext, engine)
306315

307316
result = pd.read_excel(
308317
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols=usecols
@@ -321,7 +330,7 @@ def test_read_excel_without_slicing(self, request, engine, read_ext, df_ref):
321330
xfail_datetimes_with_pyxlsb(engine, request)
322331

323332
expected = df_ref
324-
adjust_expected(expected, read_ext)
333+
adjust_expected(expected, read_ext, engine)
325334

326335
result = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
327336
tm.assert_frame_equal(result, expected)
@@ -330,7 +339,7 @@ def test_usecols_excel_range_str(self, request, engine, read_ext, df_ref):
330339
xfail_datetimes_with_pyxlsb(engine, request)
331340

332341
expected = df_ref[["C", "D"]]
333-
adjust_expected(expected, read_ext)
342+
adjust_expected(expected, read_ext, engine)
334343

335344
result = pd.read_excel(
336345
"test1" + read_ext, sheet_name="Sheet1", index_col=0, usecols="A,D:E"
@@ -428,7 +437,7 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
428437
xfail_datetimes_with_pyxlsb(engine, request)
429438

430439
expected = df_ref
431-
adjust_expected(expected, read_ext)
440+
adjust_expected(expected, read_ext, engine)
432441

433442
df1 = pd.read_excel("test1" + read_ext, sheet_name="Sheet1", index_col=0)
434443
df2 = pd.read_excel(
@@ -446,20 +455,24 @@ def test_excel_table(self, request, engine, read_ext, df_ref):
446455
def test_reader_special_dtypes(self, request, engine, read_ext):
447456
xfail_datetimes_with_pyxlsb(engine, request)
448457

458+
unit = get_exp_unit(read_ext, engine)
449459
expected = DataFrame.from_dict(
450460
{
451461
"IntCol": [1, 2, -3, 4, 0],
452462
"FloatCol": [1.25, 2.25, 1.83, 1.92, 0.0000000005],
453463
"BoolCol": [True, False, True, True, False],
454464
"StrCol": [1, 2, 3, 4, 5],
455465
"Str2Col": ["a", 3, "c", "d", "e"],
456-
"DateCol": [
457-
datetime(2013, 10, 30),
458-
datetime(2013, 10, 31),
459-
datetime(1905, 1, 1),
460-
datetime(2013, 12, 14),
461-
datetime(2015, 3, 14),
462-
],
466+
"DateCol": Index(
467+
[
468+
datetime(2013, 10, 30),
469+
datetime(2013, 10, 31),
470+
datetime(1905, 1, 1),
471+
datetime(2013, 12, 14),
472+
datetime(2015, 3, 14),
473+
],
474+
dtype=f"M8[{unit}]",
475+
),
463476
},
464477
)
465478
basename = "test_types"
@@ -578,7 +591,7 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
578591
actual = pd.read_excel(basename + read_ext, dtype=dtype)
579592
tm.assert_frame_equal(actual, expected)
580593

581-
def test_dtype_backend(self, read_ext, dtype_backend):
594+
def test_dtype_backend(self, read_ext, dtype_backend, engine):
582595
# GH#36712
583596
if read_ext in (".xlsb", ".xls"):
584597
pytest.skip(f"No engine for filetype: '{read_ext}'")
@@ -621,6 +634,9 @@ def test_dtype_backend(self, read_ext, dtype_backend):
621634
expected["j"] = ArrowExtensionArray(pa.array([None, None]))
622635
else:
623636
expected = df
637+
unit = get_exp_unit(read_ext, engine)
638+
expected["i"] = expected["i"].astype(f"M8[{unit}]")
639+
624640
tm.assert_frame_equal(result, expected)
625641

626642
def test_dtype_backend_and_dtype(self, read_ext):
@@ -812,7 +828,7 @@ def test_sheet_name(self, request, read_ext, engine, df_ref):
812828
sheet_name = "Sheet1"
813829

814830
expected = df_ref
815-
adjust_expected(expected, read_ext)
831+
adjust_expected(expected, read_ext, engine)
816832

817833
df1 = pd.read_excel(
818834
filename + read_ext, sheet_name=sheet_name, index_col=0
@@ -1010,6 +1026,8 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
10101026
# see gh-4679
10111027
xfail_datetimes_with_pyxlsb(engine, request)
10121028

1029+
unit = get_exp_unit(read_ext, engine)
1030+
10131031
mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]])
10141032
mi_file = "testmultiindex" + read_ext
10151033

@@ -1023,6 +1041,7 @@ def test_read_excel_multiindex(self, request, engine, read_ext):
10231041
],
10241042
columns=mi,
10251043
)
1044+
expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]")
10261045

10271046
actual = pd.read_excel(
10281047
mi_file, sheet_name="mi_column", header=[0, 1], index_col=0
@@ -1102,6 +1121,9 @@ def test_read_excel_multiindex_blank_after_name(
11021121

11031122
mi_file = "testmultiindex" + read_ext
11041123
mi = MultiIndex.from_product([["foo", "bar"], ["a", "b"]], names=["c1", "c2"])
1124+
1125+
unit = get_exp_unit(read_ext, engine)
1126+
11051127
expected = DataFrame(
11061128
[
11071129
[1, 2.5, pd.Timestamp("2015-01-01"), True],
@@ -1115,6 +1137,7 @@ def test_read_excel_multiindex_blank_after_name(
11151137
names=["ilvl1", "ilvl2"],
11161138
),
11171139
)
1140+
expected[mi[2]] = expected[mi[2]].astype(f"M8[{unit}]")
11181141
result = pd.read_excel(
11191142
mi_file,
11201143
sheet_name=sheet_name,
@@ -1218,6 +1241,8 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
12181241
# GH 4903
12191242
xfail_datetimes_with_pyxlsb(engine, request)
12201243

1244+
unit = get_exp_unit(read_ext, engine)
1245+
12211246
actual = pd.read_excel(
12221247
"testskiprows" + read_ext, sheet_name="skiprows_list", skiprows=[0, 2]
12231248
)
@@ -1230,6 +1255,7 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
12301255
],
12311256
columns=["a", "b", "c", "d"],
12321257
)
1258+
expected["c"] = expected["c"].astype(f"M8[{unit}]")
12331259
tm.assert_frame_equal(actual, expected)
12341260

12351261
actual = pd.read_excel(
@@ -1262,11 +1288,13 @@ def test_read_excel_skiprows(self, request, engine, read_ext):
12621288
],
12631289
columns=["a", "b", "c", "d"],
12641290
)
1291+
expected["c"] = expected["c"].astype(f"M8[{unit}]")
12651292
tm.assert_frame_equal(actual, expected)
12661293

12671294
def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
12681295
# GH 4903
12691296
xfail_datetimes_with_pyxlsb(engine, request)
1297+
unit = get_exp_unit(read_ext, engine)
12701298

12711299
actual = pd.read_excel(
12721300
"testskiprows" + read_ext,
@@ -1282,6 +1310,7 @@ def test_read_excel_skiprows_callable_not_in(self, request, engine, read_ext):
12821310
],
12831311
columns=["a", "b", "c", "d"],
12841312
)
1313+
expected["c"] = expected["c"].astype(f"M8[{unit}]")
12851314
tm.assert_frame_equal(actual, expected)
12861315

12871316
def test_read_excel_nrows(self, read_ext):
@@ -1538,7 +1567,7 @@ def test_excel_table_sheet_by_index(self, request, engine, read_ext, df_ref):
15381567
xfail_datetimes_with_pyxlsb(engine, request)
15391568

15401569
expected = df_ref
1541-
adjust_expected(expected, read_ext)
1570+
adjust_expected(expected, read_ext, engine)
15421571

15431572
with pd.ExcelFile("test1" + read_ext) as excel:
15441573
df1 = pd.read_excel(excel, sheet_name=0, index_col=0)
@@ -1565,7 +1594,7 @@ def test_sheet_name(self, request, engine, read_ext, df_ref):
15651594
xfail_datetimes_with_pyxlsb(engine, request)
15661595

15671596
expected = df_ref
1568-
adjust_expected(expected, read_ext)
1597+
adjust_expected(expected, read_ext, engine)
15691598

15701599
filename = "test1"
15711600
sheet_name = "Sheet1"
@@ -1657,11 +1686,14 @@ def test_read_datetime_multiindex(self, request, engine, read_ext):
16571686
f = "test_datetime_mi" + read_ext
16581687
with pd.ExcelFile(f) as excel:
16591688
actual = pd.read_excel(excel, header=[0, 1], index_col=0, engine=engine)
1660-
expected_column_index = MultiIndex.from_tuples(
1661-
[(pd.to_datetime("02/29/2020"), pd.to_datetime("03/01/2020"))],
1689+
1690+
unit = get_exp_unit(read_ext, engine)
1691+
dti = pd.DatetimeIndex(["2020-02-29", "2020-03-01"], dtype=f"M8[{unit}]")
1692+
expected_column_index = MultiIndex.from_arrays(
1693+
[dti[:1], dti[1:]],
16621694
names=[
1663-
pd.to_datetime("02/29/2020").to_pydatetime(),
1664-
pd.to_datetime("03/01/2020").to_pydatetime(),
1695+
dti[0].to_pydatetime(),
1696+
dti[1].to_pydatetime(),
16651697
],
16661698
)
16671699
expected = DataFrame([], index=[], columns=expected_column_index)

0 commit comments

Comments
 (0)