From 79cc02b0855557dd97a990b4ed962c2f0c93e4a4 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 16:14:52 -0700 Subject: [PATCH 1/4] BUG: Series(mixed_tz_objs, dtype=dt64tz) --- pandas/_libs/tslib.pyx | 48 +++++++++++++ pandas/core/arrays/datetimes.py | 5 ++ pandas/tests/arrays/test_datetimelike.py | 27 +------- .../indexes/datetimes/test_constructors.py | 68 ++++++++++++++----- pandas/tests/series/methods/test_astype.py | 13 ++++ 5 files changed, 118 insertions(+), 43 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d7c0c91332e02..bf912005ae57e 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -6,6 +6,7 @@ from cpython.datetime cimport ( import_datetime, tzinfo, ) +from cpython.object cimport PyObject # import datetime C API import_datetime() @@ -862,3 +863,50 @@ cdef inline bint _parse_today_now(str val, int64_t* iresult, bint utc): iresult[0] = Timestamp.today().value return True return False + + +def array_to_datetime_with_tz(ndarray values, tzinfo tz): + """ + Vectorized analogue to pd.Timestamp(value, tz=tz) + + values has object-dtype, unrestricted ndim. + + Major differences between this and array_to_datetime with utc=True + - np.datetime64 objects are treated as _wall_ times. + - tznaive datetimes are treated as _wall_ times. + """ + cdef: + ndarray result = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_INT64, 0) + cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values) + Py_ssize_t i, n = values.size + object item + int64_t ival + datetime ts + + for i in range(n): + # Analogous to `item = values[i]` + item = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] + + if checknull_with_nat_and_na(item): + # this catches pd.NA which would raise in the Timestamp constructor + ival = NPY_NAT + + else: + ts = Timestamp(item) + if ts is NaT: + ival = NPY_NAT + else: + if ts.tz is not None: + ts = ts.tz_convert(tz) + else: + # datetime64, tznaive pydatetime, int, float + ts = ts.tz_localize(tz) + ts = ts._as_unit("ns") + ival = ts.value + + # Analogous to: result[i] = ival + (cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival + + cnp.PyArray_MultiIter_NEXT(mi) + + return result diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ca54ab163ab64..d87e24bb3c388 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -2029,6 +2029,11 @@ def _sequence_to_dt64ns( copy = False if lib.infer_dtype(data, skipna=False) == "integer": data = data.astype(np.int64) + elif tz is not None and ambiguous == "raise": + # TODO: yearfirst/dayfirst/etc? + obj_data = np.asarray(data, dtype=object) + i8data = tslib.array_to_datetime_with_tz(obj_data, tz) + return i8data.view(DT64NS_DTYPE), tz, None else: # data comes back here as either i8 to denote UTC timestamps # or M8[ns] to denote wall times diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 72b2cd15d3222..cb0ee6ce07b03 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -292,19 +292,7 @@ def test_searchsorted(self): assert result == 10 @pytest.mark.parametrize("box", [None, "index", "series"]) - def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage): - if isinstance(arr1d, DatetimeArray): - tz = arr1d.tz - ts1, ts2 = arr1d[1:3] - if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): - # If we have e.g. tzutc(), when we cast to string and parse - # back we get pytz.UTC, and then consider them different timezones - # so incorrectly raise. - mark = pytest.mark.xfail( - raises=TypeError, reason="timezone comparisons inconsistent" - ) - request.node.add_marker(mark) - + def test_searchsorted_castable_strings(self, arr1d, box, string_storage): arr = arr1d if box is None: pass @@ -461,19 +449,8 @@ def test_setitem_object_dtype(self, box, arr1d): tm.assert_equal(arr1d, expected) - def test_setitem_strs(self, arr1d, request): + def test_setitem_strs(self, arr1d): # Check that we parse strs in both scalar and listlike - if isinstance(arr1d, DatetimeArray): - tz = arr1d.tz - ts1, ts2 = arr1d[-2:] - if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): - # If we have e.g. tzutc(), when we cast to string and parse - # back we get pytz.UTC, and then consider them different timezones - # so incorrectly raise. - mark = pytest.mark.xfail( - raises=TypeError, reason="timezone comparisons inconsistent" - ) - request.node.add_marker(mark) # Setting list-like of strs expected = arr1d.copy() diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 0061cfd2b903f..4aaa2b694102d 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -466,25 +466,57 @@ def test_construction_dti_with_mixed_timezones(self): name="idx", ) - with pytest.raises(ValueError, match=msg): - DatetimeIndex( - [ - Timestamp("2011-01-01 10:00"), - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="Asia/Tokyo", - name="idx", - ) + # pre-2.0 this raised bc of awareness mismatch. in 2.0 with a tz# + # specified we behave as if this was called pointwise, so + # the naive Timestamp is treated as a wall time. + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="Asia/Tokyo", + name="idx", + ) + expected = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern").tz_convert("Asia/Tokyo"), + ], + tz="Asia/Tokyo", + name="idx", + ) + tm.assert_index_equal(dti, expected) - with pytest.raises(ValueError, match=msg): - DatetimeIndex( - [ - Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), - Timestamp("2011-01-02 10:00", tz="US/Eastern"), - ], - tz="US/Eastern", - name="idx", - ) + # pre-2.0 mixed-tz scalars raised even if a tz/dtype was specified. + # as of 2.0 we successfully return the requested tz/dtype + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + expected = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo").tz_convert("US/Eastern"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + tz="US/Eastern", + name="idx", + ) + tm.assert_index_equal(dti, expected) + + # same thing but pass dtype instead of tz + dti = DatetimeIndex( + [ + Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), + Timestamp("2011-01-02 10:00", tz="US/Eastern"), + ], + dtype="M8[ns, US/Eastern]", + name="idx", + ) + tm.assert_index_equal(dti, expected) def test_construction_base_constructor(self): arr = [Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-03")] diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index a0b6502b4ef5c..1c32fc60adeee 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -97,6 +97,19 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: + def test_astype_mixed_object_to_dt64tz(self): + # pre-2.0 this raised ValueError bc of tz mismatch + ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific") + ts2 = ts.tz_convert("Asia/Tokyo") + + ser = Series([ts, ts2], dtype=object) + res = ser.astype("datetime64[ns, Europe/Brussels]") + expected = Series( + [ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")], + dtype="datetime64[ns, Europe/Brussels]", + ) + tm.assert_series_equal(res, expected) + @pytest.mark.parametrize("dtype", np.typecodes["All"]) def test_astype_empty_constructor_equality(self, dtype): # see GH#15524 From 318c158124d0e9aba1b9b494476f19927634bc32 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 16:18:07 -0700 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/series/methods/test_astype.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 04e5154ca1a0b..e0b9001fa035d 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -355,7 +355,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` with object-dtype containing multiple timezone-aware ``datetime`` objects with heterogeneous timezones to a :class:`DatetimeTZDtype` incorrectly raising (:issue:`32581`) - Numeric diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 1c32fc60adeee..cc0f99b3a2843 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -99,6 +99,7 @@ def test_astype_dict_like(self, dtype_class): class TestAstype: def test_astype_mixed_object_to_dt64tz(self): # pre-2.0 this raised ValueError bc of tz mismatch + # xref GH#32581 ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific") ts2 = ts.tz_convert("Asia/Tokyo") From 3747e8cbfa8fffd76ac5be3fe109ba8ba04f3f9a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 31 Oct 2022 16:21:48 -0700 Subject: [PATCH 3/4] update pyi --- pandas/_libs/tslib.pyi | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/_libs/tslib.pyi b/pandas/_libs/tslib.pyi index 8fec9ecf27f30..ac8d5bac7c6e7 100644 --- a/pandas/_libs/tslib.pyi +++ b/pandas/_libs/tslib.pyi @@ -28,3 +28,7 @@ def array_to_datetime( ) -> tuple[np.ndarray, tzinfo | None]: ... # returned ndarray may be object dtype or datetime64[ns] + +def array_to_datetime_with_tz( + values: npt.NDArray[np.object_], tz: tzinfo +) -> npt.NDArray[np.int64]: ... From 8cc329b60fe17cf4f33bf53b16c0cc9d1177d0ab Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 1 Nov 2022 13:59:14 -0700 Subject: [PATCH 4/4] DEPR: __setitem__ on dt64tz with mixed timezones --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/core/arrays/datetimelike.py | 24 +++------ pandas/core/arrays/datetimes.py | 23 ++------- pandas/core/arrays/period.py | 5 +- pandas/core/arrays/timedeltas.py | 6 +-- pandas/tests/arrays/test_datetimelike.py | 20 +++----- pandas/tests/arrays/test_datetimes.py | 28 ++++------- pandas/tests/frame/methods/test_replace.py | 11 ++-- .../indexes/datetimes/methods/test_insert.py | 24 +++------ pandas/tests/indexing/test_coercion.py | 50 +++++++------------ pandas/tests/indexing/test_loc.py | 9 ++-- pandas/tests/series/indexing/test_setitem.py | 41 +++------------ pandas/tests/series/methods/test_fillna.py | 24 ++++----- 13 files changed, 85 insertions(+), 182 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index e0b9001fa035d..e0aee1f959ed7 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -290,11 +290,11 @@ Removal of prior version deprecations/changes - Changed behavior of :class:`DataFrame` constructor when passed a ``dtype`` (other than int) that the data cannot be cast to; it now raises instead of silently ignoring the dtype (:issue:`41733`) - Changed the behavior of :class:`Series` constructor, it will no longer infer a datetime64 or timedelta64 dtype from string entries (:issue:`41731`) - Changed behavior of :class:`Index` constructor when passed a ``SparseArray`` or ``SparseDtype`` to retain that dtype instead of casting to ``numpy.ndarray`` (:issue:`43930`) +- Changed behavior of setitem-like operations (``__setitem__``, ``fillna``, ``where``, ``mask``, ``replace``, ``insert``, fill_value for ``shift``) on an object with :class:`DatetimeTZDtype` when using a value with a non-matching timezone, the value will be cast to the object's timezone instead of casting both to object-dtype (:issue:`44243`) - Removed the deprecated ``base`` and ``loffset`` arguments from :meth:`pandas.DataFrame.resample`, :meth:`pandas.Series.resample` and :class:`pandas.Grouper`. Use ``offset`` or ``origin`` instead (:issue:`31809`) - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) -- .. --------------------------------------------------------------------------- .. _whatsnew_200.performance: diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 17ea71c8d29a4..b4198575c3f06 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -217,7 +217,7 @@ def _scalar_from_string(self, value: str) -> DTScalarOrNaT: raise AbstractMethodError(self) def _unbox_scalar( - self, value: DTScalarOrNaT, setitem: bool = False + self, value: DTScalarOrNaT ) -> np.int64 | np.datetime64 | np.timedelta64: """ Unbox the integer value of a scalar `value`. @@ -226,8 +226,6 @@ def _unbox_scalar( ---------- value : Period, Timestamp, Timedelta, or NaT Depending on subclass. - setitem : bool, default False - Whether to check compatibility with setitem strictness. Returns ------- @@ -240,9 +238,7 @@ def _unbox_scalar( """ raise AbstractMethodError(self) - def _check_compatible_with( - self, other: DTScalarOrNaT, setitem: bool = False - ) -> None: + def _check_compatible_with(self, other: DTScalarOrNaT) -> None: """ Verify that `self` and `other` are compatible. @@ -255,9 +251,6 @@ def _check_compatible_with( Parameters ---------- other - setitem : bool, default False - For __setitem__ we may have stricter compatibility restrictions than - for comparisons. Raises ------ @@ -663,7 +656,7 @@ def _validate_scalar( # this option exists to prevent a performance hit in # TimedeltaIndex.get_loc return value - return self._unbox_scalar(value, setitem=setitem) + return self._unbox_scalar(value) def _validation_error_message(self, value, allow_listlike: bool = False) -> str: """ @@ -757,19 +750,18 @@ def _validate_setitem_value(self, value): else: return self._validate_scalar(value, allow_listlike=True) - return self._unbox(value, setitem=True) + return self._unbox(value) - def _unbox( - self, other, setitem: bool = False - ) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: + @final + def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray: """ Unbox either a scalar with _unbox_scalar or an instance of our own type. """ if lib.is_scalar(other): - other = self._unbox_scalar(other, setitem=setitem) + other = self._unbox_scalar(other) else: # same type as self - self._check_compatible_with(other, setitem=setitem) + self._check_compatible_with(other) other = other._ndarray return other diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index d87e24bb3c388..b25b9862eb594 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -480,36 +480,19 @@ def _generate_range( # type: ignore[override] # ----------------------------------------------------------------- # DatetimeLike Interface - def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: + def _unbox_scalar(self, value) -> np.datetime64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timestamp.") - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) return value.asm8 def _scalar_from_string(self, value) -> Timestamp | NaTType: return Timestamp(value, tz=self.tz) - def _check_compatible_with(self, other, setitem: bool = False): + def _check_compatible_with(self, other) -> None: if other is NaT: return self._assert_tzawareness_compat(other) - if setitem: - # Stricter check for setitem vs comparison methods - if self.tz is not None and not timezones.tz_compare(self.tz, other.tz): - # TODO(2.0): remove this check. GH#37605 - warnings.warn( - "Setitem-like behavior with mismatched timezones is deprecated " - "and will change in a future version. Instead of raising " - "(or for Index, Series, and DataFrame methods, coercing to " - "object dtype), the value being set (or passed as a " - "fill_value, or inserted) will be cast to the existing " - "DatetimeArray/DatetimeIndex/Series/DataFrame column's " - "timezone. To retain the old behavior, explicitly cast to " - "object dtype before the operation.", - FutureWarning, - stacklevel=find_stack_level(), - ) - raise ValueError(f"Timezones don't match. '{self.tz}' != '{other.tz}'") # ----------------------------------------------------------------- # Descriptive Properties diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 5e1b0c4b18718..3b21cc1ecff48 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -328,13 +328,12 @@ def _generate_range(cls, start, end, periods, freq, fields): def _unbox_scalar( # type: ignore[override] self, value: Period | NaTType, - setitem: bool = False, ) -> np.int64: if value is NaT: # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value" return np.int64(value.value) # type: ignore[union-attr] elif isinstance(value, self._scalar_type): - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) return np.int64(value.ordinal) else: raise ValueError(f"'value' should be a Period. Got '{value}' instead.") @@ -342,7 +341,7 @@ def _unbox_scalar( # type: ignore[override] def _scalar_from_string(self, value: str) -> Period: return Period(value, freq=self.freq) - def _check_compatible_with(self, other, setitem: bool = False) -> None: + def _check_compatible_with(self, other) -> None: if other is NaT: return self._require_matching_freq(other) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 92b9222cfc9bc..e05557f001e29 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -288,10 +288,10 @@ def _generate_range(cls, start, end, periods, freq, closed=None): # ---------------------------------------------------------------- # DatetimeLike Interface - def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: + def _unbox_scalar(self, value) -> np.timedelta64: if not isinstance(value, self._scalar_type) and value is not NaT: raise ValueError("'value' should be a Timedelta.") - self._check_compatible_with(value, setitem=setitem) + self._check_compatible_with(value) if value is NaT: return np.timedelta64(value.value, "ns") else: @@ -301,7 +301,7 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: def _scalar_from_string(self, value) -> Timedelta | NaTType: return Timedelta(value) - def _check_compatible_with(self, other, setitem: bool = False) -> None: + def _check_compatible_with(self, other) -> None: # we don't have anything to validate. pass diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index cb0ee6ce07b03..3f310d0efa2ca 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -829,18 +829,14 @@ def test_take_fill_valid(self, arr1d, fixed_now_ts): # GH#37356 # Assuming here that arr1d fixture does not include Australia/Melbourne value = fixed_now_ts.tz_localize("Australia/Melbourne") - msg = "Timezones don't match. .* != 'Australia/Melbourne'" - with pytest.raises(ValueError, match=msg): - # require tz match, not just tzawareness match - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezone" - ): - result = arr.take([-1, 1], allow_fill=True, fill_value=value) - - # once deprecation is enforced - # expected = arr.take([-1, 1], allow_fill=True, - # fill_value=value.tz_convert(arr.dtype.tz)) - # tm.assert_equal(result, expected) + result = arr.take([-1, 1], allow_fill=True, fill_value=value) + + expected = arr.take( + [-1, 1], + allow_fill=True, + fill_value=value.tz_convert(arr.dtype.tz), + ) + tm.assert_equal(result, expected) def test_concat_same_type_invalid(self, arr1d): # different timezones diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index babab81dfbe57..37a9c19627ada 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -429,19 +429,16 @@ def test_setitem_str_impute_tz(self, tz_naive_fixture): tm.assert_equal(arr, expected) def test_setitem_different_tz_raises(self): + # pre-2.0 we required exact tz match, in 2.0 we require only + # tzawareness-match data = np.array([1, 2, 3], dtype="M8[ns]") arr = DatetimeArray(data, copy=False, dtype=DatetimeTZDtype(tz="US/Central")) with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"): arr[0] = pd.Timestamp("2000") ts = pd.Timestamp("2000", tz="US/Eastern") - with pytest.raises(ValueError, match="US/Central"): - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezones" - ): - arr[0] = ts - # once deprecation is enforced - # assert arr[0] == ts.tz_convert("US/Central") + arr[0] = ts + assert arr[0] == ts.tz_convert("US/Central") def test_setitem_clears_freq(self): a = DatetimeArray(pd.date_range("2000", periods=2, freq="D", tz="US/Central")) @@ -688,23 +685,16 @@ def test_shift_value_tzawareness_mismatch(self): dta.shift(1, fill_value=invalid) def test_shift_requires_tzmatch(self): - # since filling is setitem-like, we require a matching timezone, - # not just matching tzawawreness + # pre-2.0 we required exact tz match, in 2.0 we require just + # matching tzawareness dti = pd.date_range("2016-01-01", periods=3, tz="UTC") dta = dti._data fill_value = pd.Timestamp("2020-10-18 18:44", tz="US/Pacific") - msg = "Timezones don't match. 'UTC' != 'US/Pacific'" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match="mismatched timezones" - ): - dta.shift(1, fill_value=fill_value) - - # once deprecation is enforced - # expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) - # tm.assert_equal(result, expected) + result = dta.shift(1, fill_value=fill_value) + expected = dta.shift(1, fill_value=fill_value.tz_convert("UTC")) + tm.assert_equal(result, expected) def test_tz_localize_t2d(self): dti = pd.date_range("1994-05-12", periods=12, tz="US/Pacific") diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py index 9eaba56a23e0f..646d275bbfc40 100644 --- a/pandas/tests/frame/methods/test_replace.py +++ b/pandas/tests/frame/methods/test_replace.py @@ -1162,20 +1162,15 @@ def test_replace_datetimetz(self): result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Eastern")) tm.assert_frame_equal(result, expected) - # coerce to object + # pre-2.0 this would coerce to object with mismatched tzs result = df.copy() result.iloc[1, 0] = np.nan - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = result.replace( - {"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific") - ) + result = result.replace({"A": pd.NaT}, Timestamp("20130104", tz="US/Pacific")) expected = DataFrame( { "A": [ Timestamp("20130101", tz="US/Eastern"), - Timestamp("20130104", tz="US/Pacific"), - # once deprecation is enforced - # Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), + Timestamp("20130104", tz="US/Pacific").tz_convert("US/Eastern"), Timestamp("20130103", tz="US/Eastern"), ], "B": [0, np.nan, 2], diff --git a/pandas/tests/indexes/datetimes/methods/test_insert.py b/pandas/tests/indexes/datetimes/methods/test_insert.py index 592f4240ee750..2478a3ba799ad 100644 --- a/pandas/tests/indexes/datetimes/methods/test_insert.py +++ b/pandas/tests/indexes/datetimes/methods/test_insert.py @@ -193,36 +193,26 @@ def test_insert_mismatched_tzawareness(self): # TODO: also changes DataFrame.__setitem__ with expansion def test_insert_mismatched_tz(self): # see GH#7299 + # pre-2.0 with mismatched tzs we would cast to object idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") # mismatched tz -> cast to object (could reasonably cast to same tz or UTC) item = Timestamp("2000-01-04", tz="US/Eastern") - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = idx.insert(3, item) + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), - dtype=object, - # once deprecation is enforced - # list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), + list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]), name="idx", ) - # once deprecation is enforced - # assert expected.dtype == idx.dtype + assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) - # mismatched tz -> cast to object (could reasonably cast to same tz) item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")) - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = idx.insert(3, item) + result = idx.insert(3, item) expected = Index( - list(idx[:3]) + [item] + list(idx[3:]), - dtype=object, - # once deprecation is enforced - # list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), + list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]), name="idx", ) - # once deprecation is enforced - # assert expected.dtype == idx.dtype + assert expected.dtype == idx.dtype tm.assert_index_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py index 26424904482d1..ee2c06150bf53 100644 --- a/pandas/tests/indexing/test_coercion.py +++ b/pandas/tests/indexing/test_coercion.py @@ -287,14 +287,11 @@ def test_insert_index_datetimes(self, fill_val, exp_dtype, insert_value): assert expected.dtype == object tm.assert_index_equal(result, expected) - # mismatched tz --> cast to object (could reasonably cast to common tz) ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo") - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = obj.insert(1, ts) + result = obj.insert(1, ts) # once deprecation is enforced: - # expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) - # assert expected.dtype == obj.dtype - expected = obj.astype(object).insert(1, ts) + expected = obj.insert(1, ts.tz_convert(obj.dtype.tz)) + assert expected.dtype == obj.dtype tm.assert_index_equal(result, expected) else: @@ -652,7 +649,8 @@ def test_fillna_datetime(self, index_or_series, fill_val, fill_dtype): [ (pd.Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), (pd.Timestamp("2012-01-01"), object), - (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), object), + # pre-2.0 with a mismatched tz we would get object result + (pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), "datetime64[ns, US/Eastern]"), (1, object), ("x", object), ], @@ -671,22 +669,19 @@ def test_fillna_datetime64tz(self, index_or_series, fill_val, fill_dtype): ) assert obj.dtype == "datetime64[ns, US/Eastern]" + if getattr(fill_val, "tz", None) is None: + fv = fill_val + else: + fv = fill_val.tz_convert(tz) exp = klass( [ pd.Timestamp("2011-01-01", tz=tz), - fill_val, - # Once deprecation is enforced, this becomes: - # fill_val.tz_convert(tz) if getattr(fill_val, "tz", None) - # is not None else fill_val, + fv, pd.Timestamp("2011-01-03", tz=tz), pd.Timestamp("2011-01-04", tz=tz), ] ) - warn = None - if getattr(fill_val, "tz", None) is not None and fill_val.tz != obj[0].tz: - warn = FutureWarning - with tm.assert_produces_warning(warn, match="mismatched timezone"): - self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) + self._assert_fillna_conversion(obj, fill_val, exp, fill_dtype) @pytest.mark.parametrize( "fill_val", @@ -914,23 +909,16 @@ def test_replace_series_datetime_datetime(self, how, to_key, from_key, replacer) obj = pd.Series(self.rep[from_key], index=index, name="yyy") assert obj.dtype == from_key - warn = None - rep_ser = pd.Series(replacer) - if ( - isinstance(obj.dtype, pd.DatetimeTZDtype) - and isinstance(rep_ser.dtype, pd.DatetimeTZDtype) - and obj.dtype != rep_ser.dtype - ): - # mismatched tz DatetimeArray behavior will change to cast - # for setitem-like methods with mismatched tzs GH#44940 - warn = FutureWarning - - msg = "explicitly cast to object" - with tm.assert_produces_warning(warn, match=msg): - result = obj.replace(replacer) + result = obj.replace(replacer) exp = pd.Series(self.rep[to_key], index=index, name="yyy") - assert exp.dtype == to_key + if isinstance(obj.dtype, pd.DatetimeTZDtype) and isinstance( + exp.dtype, pd.DatetimeTZDtype + ): + # with mismatched tzs, we retain the original dtype as of 2.0 + exp = exp.astype(obj.dtype) + else: + assert exp.dtype == to_key tm.assert_series_equal(result, exp) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index d462ef534e02f..43abacbd6073c 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2065,13 +2065,12 @@ def test_setitem_with_expansion(self): df.time = df.set_index("time").index.tz_localize("UTC") v = df[df.new_col == "new"].set_index("time").index.tz_convert("US/Pacific") - # trying to set a single element on a part of a different timezone - # this converts to object + # pre-2.0 trying to set a single element on a part of a different + # timezone converted to object; in 2.0 it retains dtype df2 = df.copy() - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - df2.loc[df2.new_col == "new", "time"] = v + df2.loc[df2.new_col == "new", "time"] = v - expected = Series([v[0], df.loc[1, "time"]], name="time") + expected = Series([v[0].tz_convert("UTC"), df.loc[1, "time"]], name="time") tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == "new", "time"] + Timedelta("1s") diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e07da3fcdb53c..74d05b7e43b2f 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1035,28 +1035,18 @@ def key(self): return 0 @pytest.fixture - def expected(self): + def expected(self, obj, val): + # pre-2.0 this would cast to object, in 2.0 we cast the val to + # the target tz expected = Series( [ - Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), + val.tz_convert("US/Central"), Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), ], - dtype=object, + dtype=obj.dtype, ) return expected - @pytest.fixture(autouse=True) - def assert_warns(self, request): - # check that we issue a FutureWarning about timezone-matching - if request.function.__name__ == "test_slice_key": - key = request.getfixturevalue("key") - if not isinstance(key, slice): - # The test is a no-op, so no warning will be issued - yield - return - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - yield - @pytest.mark.parametrize( "obj,expected", @@ -1341,7 +1331,8 @@ def obj(self): "val,exp_dtype", [ (Timestamp("2012-01-01", tz="US/Eastern"), "datetime64[ns, US/Eastern]"), - (Timestamp("2012-01-01", tz="US/Pacific"), object), + # pre-2.0, a mis-matched tz would end up casting to object + (Timestamp("2012-01-01", tz="US/Pacific"), "datetime64[ns, US/Eastern]"), (Timestamp("2012-01-01"), object), (1, object), ], @@ -1353,24 +1344,6 @@ def obj(self): tz = "US/Eastern" return Series(date_range("2011-01-01", freq="D", periods=4, tz=tz)) - @pytest.fixture(autouse=True) - def assert_warns(self, request): - # check that we issue a FutureWarning about timezone-matching - if request.function.__name__ == "test_slice_key": - key = request.getfixturevalue("key") - if not isinstance(key, slice): - # The test is a no-op, so no warning will be issued - yield - return - - exp_dtype = request.getfixturevalue("exp_dtype") - val = request.getfixturevalue("val") - if exp_dtype == object and isinstance(val, Timestamp) and val.tz is not None: - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - yield - else: - yield - @pytest.mark.parametrize( "val,exp_dtype", diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 18a4d8355c764..caa14a440d04c 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -559,14 +559,15 @@ def test_datetime64_tz_fillna(self, tz): tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) + # pre-2.0 fillna with mixed tzs would cast to object, in 2.0 + # it retains dtype. + result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) expected = Series( [ Timestamp("2011-01-01 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), Timestamp("2011-01-03 10:00", tz=tz), - Timestamp("2013-01-01", tz="US/Pacific"), + Timestamp("2013-01-01", tz="US/Pacific").tz_convert(tz), ] ) tm.assert_series_equal(expected, result) @@ -817,18 +818,15 @@ def test_fillna_datetime64_with_timezone_tzinfo(self): result = ser.fillna(datetime(2020, 1, 2, tzinfo=timezone.utc)) tm.assert_series_equal(result, expected) - # but we dont (yet) consider distinct tzinfos for non-UTC tz equivalent + # pre-2.0 we cast to object with mixed tzs, in 2.0 we retain dtype ts = Timestamp("2000-01-01", tz="US/Pacific") ser2 = Series(ser._values.tz_convert("dateutil/US/Pacific")) assert ser2.dtype.kind == "M" - with tm.assert_produces_warning(FutureWarning, match="mismatched timezone"): - result = ser2.fillna(ts) - expected = Series([ser[0], ts, ser[2]], dtype=object) - # TODO(2.0): once deprecation is enforced - # expected = Series( - # [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], - # dtype=ser2.dtype, - # ) + result = ser2.fillna(ts) + expected = Series( + [ser2[0], ts.tz_convert(ser2.dtype.tz), ser2[2]], + dtype=ser2.dtype, + ) tm.assert_series_equal(result, expected) def test_fillna_pos_args_deprecation(self):