Skip to content

TST: dt64 units #56239

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -467,13 +467,15 @@ def _array_strptime_with_fallback(
"""
result, tz_out = array_strptime(arg, fmt, exact=exact, errors=errors, utc=utc)
if tz_out is not None:
dtype = DatetimeTZDtype(tz=tz_out)
unit = np.datetime_data(result.dtype)[0]
dtype = DatetimeTZDtype(tz=tz_out, unit=unit)
dta = DatetimeArray._simple_new(result, dtype=dtype)
if utc:
dta = dta.tz_convert("UTC")
return Index(dta, name=name)
elif result.dtype != object and utc:
res = Index(result, dtype="M8[ns, UTC]", name=name)
unit = np.datetime_data(result.dtype)[0]
res = Index(result, dtype=f"M8[{unit}, UTC]", name=name)
return res
return Index(result, dtype=result.dtype, name=name)

Expand Down
10 changes: 0 additions & 10 deletions pandas/tests/arithmetic/test_timedelta64.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,20 +746,10 @@ def test_timedelta_ops_with_missing_values(self):
s1 = pd.to_timedelta(Series(["00:00:01"]))
s2 = pd.to_timedelta(Series(["00:00:02"]))

msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"
with pytest.raises(TypeError, match=msg):
# Passing datetime64-dtype data to TimedeltaIndex is no longer
# supported GH#29794
pd.to_timedelta(Series([NaT])) # TODO: belongs elsewhere?

sn = pd.to_timedelta(Series([NaT], dtype="m8[ns]"))

df1 = DataFrame(["00:00:01"]).apply(pd.to_timedelta)
df2 = DataFrame(["00:00:02"]).apply(pd.to_timedelta)
with pytest.raises(TypeError, match=msg):
# Passing datetime64-dtype data to TimedeltaIndex is no longer
# supported GH#29794
DataFrame([NaT]).apply(pd.to_timedelta) # TODO: belongs elsewhere?

dfn = DataFrame([NaT._value]).apply(pd.to_timedelta)

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def test_array_copy():
),
(
[datetime.datetime(2000, 1, 1), datetime.datetime(2001, 1, 1)],
DatetimeArray._from_sequence(["2000", "2001"]),
DatetimeArray._from_sequence(["2000", "2001"], dtype="M8[ns]"),
),
(
np.array([1, 2], dtype="M8[ns]"),
Expand All @@ -284,7 +284,7 @@ def test_array_copy():
(
[pd.Timestamp("2000", tz="CET"), pd.Timestamp("2001", tz="CET")],
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET")
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz="CET", unit="ns")
),
),
(
Expand All @@ -293,7 +293,7 @@ def test_array_copy():
datetime.datetime(2001, 1, 1, tzinfo=cet),
],
DatetimeArray._from_sequence(
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet)
["2000", "2001"], dtype=pd.DatetimeTZDtype(tz=cet, unit="ns")
),
),
# timedelta
Expand Down
21 changes: 11 additions & 10 deletions pandas/tests/frame/constructors/test_from_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,26 +442,27 @@ def test_from_records_misc_brokenness(self):
exp = DataFrame(data, index=["a", "b", "c"])
tm.assert_frame_equal(result, exp)

def test_from_records_misc_brokenness2(self):
# GH#2623
rows = []
rows.append([datetime(2010, 1, 1), 1])
rows.append([datetime(2010, 1, 2), "hi"]) # test col upconverts to obj
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
result = df2_obj.dtypes
expected = Series(
[np.dtype("datetime64[ns]"), np.dtype("object")], index=["date", "test"]
result = DataFrame.from_records(rows, columns=["date", "test"])
expected = DataFrame(
{"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
)
tm.assert_series_equal(result, expected)
tm.assert_frame_equal(result, expected)
assert result.dtypes["test"] == np.dtype(object)

def test_from_records_misc_brokenness3(self):
rows = []
rows.append([datetime(2010, 1, 1), 1])
rows.append([datetime(2010, 1, 2), 1])
df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
result = df2_obj.dtypes
expected = Series(
[np.dtype("datetime64[ns]"), np.dtype("int64")], index=["date", "test"]
result = DataFrame.from_records(rows, columns=["date", "test"])
expected = DataFrame(
{"date": [row[0] for row in rows], "test": [row[1] for row in rows]}
)
tm.assert_series_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_from_records_empty(self):
# GH#3562
Expand Down
14 changes: 9 additions & 5 deletions pandas/tests/frame/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -809,11 +809,13 @@ def test_replace_for_new_dtypes(self, datetime_frame):
Timestamp("20130104", tz="US/Eastern"),
DataFrame(
{
"A": [
Timestamp("20130101", tz="US/Eastern"),
Timestamp("20130104", tz="US/Eastern"),
Timestamp("20130103", tz="US/Eastern"),
],
"A": pd.DatetimeIndex(
[
Timestamp("20130101", tz="US/Eastern"),
Timestamp("20130104", tz="US/Eastern"),
Timestamp("20130103", tz="US/Eastern"),
]
).as_unit("ns"),
"B": [0, np.nan, 2],
}
),
Expand Down Expand Up @@ -1174,6 +1176,7 @@ def test_replace_datetimetz(self):
"B": [0, np.nan, 2],
}
)
expected["A"] = expected["A"].dt.as_unit("ns")
tm.assert_frame_equal(result, expected)

result = df.copy()
Expand All @@ -1195,6 +1198,7 @@ def test_replace_datetimetz(self):
"B": [0, np.nan, 2],
}
)
expected["A"] = expected["A"].dt.as_unit("ns")
tm.assert_frame_equal(result, expected)

result = df.copy()
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -699,9 +699,12 @@ def test_reset_index_multiindex_nat():
df = DataFrame({"id": idx, "tstamp": tstamp, "a": list("abc")})
df.loc[2, "tstamp"] = pd.NaT
result = df.set_index(["id", "tstamp"]).reset_index("id")
exp_dti = pd.DatetimeIndex(
["2015-07-01", "2015-07-02", "NaT"], dtype="M8[ns]", name="tstamp"
)
expected = DataFrame(
{"id": range(3), "a": list("abc")},
index=pd.DatetimeIndex(["2015-07-01", "2015-07-02", "NaT"], name="tstamp"),
index=exp_dti,
)
tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,13 +592,13 @@ def test_integer_values_and_tz_interpreted_as_utc(self):

result = DatetimeIndex(values).tz_localize("US/Central")

expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central")
expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, US/Central]")
tm.assert_index_equal(result, expected)

# but UTC is *not* deprecated.
with tm.assert_produces_warning(None):
result = DatetimeIndex(values, tz="UTC")
expected = DatetimeIndex(["2000-01-01T00:00:00"], tz="UTC")
expected = DatetimeIndex(["2000-01-01T00:00:00"], dtype="M8[ns, UTC]")
tm.assert_index_equal(result, expected)

def test_constructor_coverage(self):
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1625,7 +1625,8 @@ def test_read_timezone_information(self):
result = read_json(
StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
)
expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC"))
exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
expected = Series([88], index=exp_dti)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
Expand Down
1 change: 0 additions & 1 deletion pandas/tests/reshape/concat/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ def test_concat_datetime_timezone(self):
dtype="M8[ns, Europe/Paris]",
freq="h",
)

expected = DataFrame(
[[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"]
)
Expand Down
6 changes: 4 additions & 2 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,10 @@ def test_pivot_no_values(self):
index=idx,
)
res = df.pivot_table(index=df.index.month, columns=Grouper(key="dt", freq="ME"))
exp_columns = MultiIndex.from_tuples([("A", pd.Timestamp("2011-01-31"))])
exp_columns.names = [None, "dt"]
exp_columns = MultiIndex.from_arrays(
[["A"], pd.DatetimeIndex(["2011-01-31"], dtype="M8[ns]")],
names=[None, "dt"],
)
exp = DataFrame(
[3.25, 2.0], index=Index([1, 2], dtype=np.int32), columns=exp_columns
)
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def test_setitem_with_tz(self, tz, indexer_sli):
Timestamp("2016-01-01 00:00", tz=tz),
Timestamp("2011-01-01 00:00", tz=tz),
Timestamp("2016-01-01 02:00", tz=tz),
]
],
dtype=orig.dtype,
)

# scalar
Expand All @@ -100,6 +101,7 @@ def test_setitem_with_tz(self, tz, indexer_sli):
vals = Series(
[Timestamp("2011-01-01", tz=tz), Timestamp("2012-01-01", tz=tz)],
index=[1, 2],
dtype=orig.dtype,
)
assert vals.dtype == f"datetime64[ns, {tz}]"

Expand All @@ -108,7 +110,8 @@ def test_setitem_with_tz(self, tz, indexer_sli):
Timestamp("2016-01-01 00:00", tz=tz),
Timestamp("2011-01-01 00:00", tz=tz),
Timestamp("2012-01-01 00:00", tz=tz),
]
],
dtype=orig.dtype,
)

ser = orig.copy()
Expand Down
28 changes: 18 additions & 10 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -2058,7 +2058,11 @@ def test_to_datetime_unit(self, dtype):
ser = Series([epoch + t for t in range(20)]).astype(dtype)
result = to_datetime(ser, unit="s")
expected = Series(
[Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
[
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
for t in range(20)
],
dtype="M8[ns]",
)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -2208,7 +2212,8 @@ def test_dataframe_field_aliases_column_subset(self, df, cache, unit):
# unit mappings
result = to_datetime(df[list(unit.keys())].rename(columns=unit), cache=cache)
expected = Series(
[Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")]
[Timestamp("20150204 06:58:10"), Timestamp("20160305 07:59:11")],
dtype="M8[ns]",
)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -2970,7 +2975,8 @@ def test_to_datetime_iso8601_noleading_0s(self, cache, format):
Timestamp("2015-03-03"),
]
)
tm.assert_series_equal(to_datetime(ser, format=format, cache=cache), expected)
result = to_datetime(ser, format=format, cache=cache)
tm.assert_series_equal(result, expected)

def test_parse_dates_infer_datetime_format_warning(self):
# GH 49024
Expand Down Expand Up @@ -3364,7 +3370,8 @@ def test_julian(self, julian_dates):
def test_unix(self):
result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
expected = Series(
[Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")]
[Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")],
dtype="M8[ns]",
)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -3483,7 +3490,7 @@ def test_arg_tz_ns_unit(self, offset, utc, exp):
# GH 25546
arg = "2019-01-01T00:00:00.000" + offset
result = to_datetime([arg], unit="ns", utc=utc)
expected = to_datetime([exp])
expected = to_datetime([exp]).as_unit("ns")
tm.assert_index_equal(result, expected)


Expand Down Expand Up @@ -3610,19 +3617,20 @@ def test_to_datetime_monotonic_increasing_index(cache):
)
def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
# GH#45319
s = Series(
ser = Series(
[datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
+ ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length)
+ ([datetime.fromisoformat("1991-10-20 00:00:00+00:00")] * series_length),
dtype=object,
)
result1 = to_datetime(s, errors="coerce", utc=True)
result1 = to_datetime(ser, errors="coerce", utc=True)

expected1 = Series(
[NaT] + ([Timestamp("1991-10-20 00:00:00+00:00")] * series_length)
)

tm.assert_series_equal(result1, expected1)

result2 = to_datetime(s, errors="ignore", utc=True)
result2 = to_datetime(ser, errors="ignore", utc=True)

expected2 = Series(
[datetime.fromisoformat("1446-04-12 00:00:00+00:00")]
Expand All @@ -3632,7 +3640,7 @@ def test_to_datetime_cache_coerce_50_lines_outofbounds(series_length):
tm.assert_series_equal(result2, expected2)

with pytest.raises(OutOfBoundsDatetime, match="Out of bounds nanosecond timestamp"):
to_datetime(s, errors="raise", utc=True)
to_datetime(ser, errors="raise", utc=True)


def test_to_datetime_format_f_parse_nanos():
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@


class TestTimedeltas:
def test_to_timedelta_dt64_raises(self):
# Passing datetime64-dtype data to TimedeltaIndex is no longer
# supported GH#29794
msg = r"dtype datetime64\[ns\] cannot be converted to timedelta64\[ns\]"

ser = Series([pd.NaT])
with pytest.raises(TypeError, match=msg):
to_timedelta(ser)
with pytest.raises(TypeError, match=msg):
ser.to_frame().apply(to_timedelta)

@pytest.mark.parametrize("readonly", [True, False])
def test_to_timedelta_readonly(self, readonly):
# GH#34857
Expand Down