Skip to content

Commit dffd668

Browse files
jbrockmendelJulianWgs
authored andcommitted
DEPR: Series/DataFrame with tzaware data and tznaive dtype (pandas-dev#41555)
1 parent d3720d0 commit dffd668

File tree

4 files changed

+125
-18
lines changed

4 files changed

+125
-18
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -677,6 +677,7 @@ Deprecations
677677
- Deprecated special treatment of lists with first element a Categorical in the :class:`DataFrame` constructor; pass as ``pd.DataFrame({col: categorical, ...})`` instead (:issue:`38845`)
678678
- Deprecated passing arguments as positional (except for ``"method"``) in :meth:`DataFrame.interpolate` and :meth:`Series.interpolate` (:issue:`41485`)
679679
- Deprecated passing arguments (apart from ``value``) as positional in :meth:`DataFrame.fillna` and :meth:`Series.fillna` (:issue:`41485`)
680+
- Deprecated construction of :class:`Series` or :class:`DataFrame` with ``DatetimeTZDtype`` data and ``datetime64[ns]`` dtype. Use ``Series(data).dt.tz_localize(None)`` instead (:issue:`41555`,:issue:`33401`)
680681

681682
.. ---------------------------------------------------------------------------
682683

pandas/core/dtypes/cast.py

+50-2
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
220220
elif isinstance(value, Timestamp):
221221
if value.tz is None:
222222
value = value.to_datetime64()
223+
elif not isinstance(dtype, DatetimeTZDtype):
224+
raise TypeError("Cannot unbox tzaware Timestamp to tznaive dtype")
223225
elif isinstance(value, Timedelta):
224226
value = value.to_timedelta64()
225227

@@ -1628,9 +1630,21 @@ def maybe_cast_to_datetime(
16281630
# didn't specify one
16291631

16301632
if dta.tz is not None:
1633+
warnings.warn(
1634+
"Data is timezone-aware. Converting "
1635+
"timezone-aware data to timezone-naive by "
1636+
"passing dtype='datetime64[ns]' to "
1637+
"DataFrame or Series is deprecated and will "
1638+
"raise in a future version. Use "
1639+
"`pd.Series(values).dt.tz_localize(None)` "
1640+
"instead.",
1641+
FutureWarning,
1642+
stacklevel=8,
1643+
)
16311644
# equiv: dta.view(dtype)
16321645
# Note: NOT equivalent to dta.astype(dtype)
16331646
dta = dta.tz_localize(None)
1647+
16341648
value = dta
16351649
elif is_datetime64tz:
16361650
dtype = cast(DatetimeTZDtype, dtype)
@@ -1822,7 +1836,7 @@ def construct_2d_arraylike_from_scalar(
18221836
shape = (length, width)
18231837

18241838
if dtype.kind in ["m", "M"]:
1825-
value = maybe_unbox_datetimelike(value, dtype)
1839+
value = maybe_unbox_datetimelike_tz_deprecation(value, dtype, stacklevel=4)
18261840
elif dtype == object:
18271841
if isinstance(value, (np.timedelta64, np.datetime64)):
18281842
# calling np.array below would cast to pytimedelta/pydatetime
@@ -1885,14 +1899,48 @@ def construct_1d_arraylike_from_scalar(
18851899
if not isna(value):
18861900
value = ensure_str(value)
18871901
elif dtype.kind in ["M", "m"]:
1888-
value = maybe_unbox_datetimelike(value, dtype)
1902+
value = maybe_unbox_datetimelike_tz_deprecation(value, dtype)
18891903

18901904
subarr = np.empty(length, dtype=dtype)
18911905
subarr.fill(value)
18921906

18931907
return subarr
18941908

18951909

1910+
def maybe_unbox_datetimelike_tz_deprecation(
1911+
value: Scalar, dtype: DtypeObj, stacklevel: int = 5
1912+
):
1913+
"""
1914+
Wrap maybe_unbox_datetimelike with a check for a timezone-aware Timestamp
1915+
along with a timezone-naive datetime64 dtype, which is deprecated.
1916+
"""
1917+
# Caller is responsible for checking dtype.kind in ["m", "M"]
1918+
try:
1919+
value = maybe_unbox_datetimelike(value, dtype)
1920+
except TypeError:
1921+
if (
1922+
isinstance(value, Timestamp)
1923+
and value.tz is not None
1924+
and isinstance(dtype, np.dtype)
1925+
):
1926+
warnings.warn(
1927+
"Data is timezone-aware. Converting "
1928+
"timezone-aware data to timezone-naive by "
1929+
"passing dtype='datetime64[ns]' to "
1930+
"DataFrame or Series is deprecated and will "
1931+
"raise in a future version. Use "
1932+
"`pd.Series(values).dt.tz_localize(None)` "
1933+
"instead.",
1934+
FutureWarning,
1935+
stacklevel=stacklevel,
1936+
)
1937+
new_value = value.tz_localize(None)
1938+
return maybe_unbox_datetimelike(new_value, dtype)
1939+
else:
1940+
raise
1941+
return value
1942+
1943+
18961944
def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
18971945
"""
18981946
Transform any list-like object in a 1-dimensional numpy array of object

pandas/tests/frame/test_constructors.py

+55-13
Original file line numberDiff line numberDiff line change
@@ -2432,6 +2432,17 @@ def test_from_series_with_name_with_columns(self):
24322432
expected = DataFrame(columns=["bar"])
24332433
tm.assert_frame_equal(result, expected)
24342434

2435+
def test_nested_list_columns(self):
2436+
# GH 14467
2437+
result = DataFrame(
2438+
[[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]]
2439+
)
2440+
expected = DataFrame(
2441+
[[1, 2, 3], [4, 5, 6]],
2442+
columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]),
2443+
)
2444+
tm.assert_frame_equal(result, expected)
2445+
24352446

24362447
class TestDataFrameConstructorWithDatetimeTZ:
24372448
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
@@ -2464,12 +2475,41 @@ def test_construction_preserves_tzaware_dtypes(self, tz):
24642475
tm.assert_series_equal(result, expected)
24652476

24662477
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
2467-
# GH#25843
2478+
# GH#25843, GH#41555, GH#33401
24682479
tz = tz_aware_fixture
2469-
result = DataFrame({"d": [Timestamp("2019", tz=tz)]}, dtype="datetime64[ns]")
2470-
expected = DataFrame({"d": [Timestamp("2019")]})
2480+
ts = Timestamp("2019", tz=tz)
2481+
ts_naive = Timestamp("2019")
2482+
2483+
with tm.assert_produces_warning(FutureWarning):
2484+
result = DataFrame({0: [ts]}, dtype="datetime64[ns]")
2485+
2486+
expected = DataFrame({0: [ts_naive]})
2487+
tm.assert_frame_equal(result, expected)
2488+
2489+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
2490+
result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]")
2491+
tm.assert_frame_equal(result, expected)
2492+
2493+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
2494+
result = DataFrame([ts], dtype="datetime64[ns]")
2495+
tm.assert_frame_equal(result, expected)
2496+
2497+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
2498+
result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]")
2499+
tm.assert_frame_equal(result, expected)
2500+
2501+
with tm.assert_produces_warning(FutureWarning):
2502+
result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]")
2503+
tm.assert_frame_equal(result, expected)
2504+
2505+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
2506+
df = DataFrame([Series([ts])], dtype="datetime64[ns]")
24712507
tm.assert_frame_equal(result, expected)
24722508

2509+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
2510+
df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]")
2511+
tm.assert_equal(df, expected)
2512+
24732513
def test_from_dict(self):
24742514

24752515
# 8260
@@ -2710,13 +2750,15 @@ def test_from_out_of_bounds_timedelta(self, constructor, cls):
27102750

27112751
assert type(get1(result)) is cls
27122752

2713-
def test_nested_list_columns(self):
2714-
# GH 14467
2715-
result = DataFrame(
2716-
[[1, 2, 3], [4, 5, 6]], columns=[["A", "A", "A"], ["a", "b", "c"]]
2717-
)
2718-
expected = DataFrame(
2719-
[[1, 2, 3], [4, 5, 6]],
2720-
columns=MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("A", "c")]),
2721-
)
2722-
tm.assert_frame_equal(result, expected)
2753+
def test_tzaware_data_tznaive_dtype(self, constructor):
2754+
tz = "US/Eastern"
2755+
ts = Timestamp("2019", tz=tz)
2756+
ts_naive = Timestamp("2019")
2757+
2758+
with tm.assert_produces_warning(
2759+
FutureWarning, match="Data is timezone-aware", check_stacklevel=False
2760+
):
2761+
result = constructor(ts, dtype="M8[ns]")
2762+
2763+
assert np.all(result.dtypes == "M8[ns]")
2764+
assert np.all(result == ts_naive)

pandas/tests/series/test_constructors.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -1536,10 +1536,26 @@ def test_constructor_tz_mixed_data(self):
15361536
tm.assert_series_equal(result, expected)
15371537

15381538
def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture):
1539-
# GH#25843
1539+
# GH#25843, GH#41555, GH#33401
15401540
tz = tz_aware_fixture
1541-
result = Series([Timestamp("2019", tz=tz)], dtype="datetime64[ns]")
1542-
expected = Series([Timestamp("2019")])
1541+
ts = Timestamp("2019", tz=tz)
1542+
ts_naive = Timestamp("2019")
1543+
1544+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
1545+
result = Series([ts], dtype="datetime64[ns]")
1546+
expected = Series([ts_naive])
1547+
tm.assert_series_equal(result, expected)
1548+
1549+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
1550+
result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]")
1551+
tm.assert_series_equal(result, expected)
1552+
1553+
with tm.assert_produces_warning(FutureWarning):
1554+
result = Series({0: ts}, dtype="datetime64[ns]")
1555+
tm.assert_series_equal(result, expected)
1556+
1557+
with tm.assert_produces_warning(FutureWarning):
1558+
result = Series(ts, index=[0], dtype="datetime64[ns]")
15431559
tm.assert_series_equal(result, expected)
15441560

15451561
def test_constructor_datetime64(self):

0 commit comments

Comments
 (0)