diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 1b079217f64ea..156fe0498a8b7 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -737,6 +737,7 @@ Datetimelike - Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) - Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`) - Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`) +- Bug in :class:`DatetimeArray` construction when passed another :class:`DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`) - Timedelta diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index da5542feaea56..799cd941a4e52 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -9,6 +9,7 @@ from typing import ( TYPE_CHECKING, Literal, + cast, ) import warnings @@ -256,15 +257,26 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): _freq = None def __init__( - self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False + self, values, dtype=DT64NS_DTYPE, freq=lib.no_default, copy: bool = False ) -> None: values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=iNaT) inferred_freq = getattr(values, "_freq", None) + explicit_none = freq is None + freq = freq if freq is not lib.no_default else None if isinstance(values, type(self)): + if explicit_none: + # don't inherit from values + pass + elif freq is None: + freq = values.freq + elif freq and values.freq: + freq = to_offset(freq) + freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) + # validation dtz = getattr(dtype, "tz", None) if dtz and values.tz is None: @@ -279,14 +291,13 @@ def __init__( elif values.tz: dtype = values.dtype - if freq is None: - freq = values.freq values = values._ndarray if not isinstance(values, np.ndarray): raise ValueError( - f"Unexpected type '{type(values).__name__}'. 'values' must be " - "a DatetimeArray, ndarray, or Series or Index containing one of those." + f"Unexpected type '{type(values).__name__}'. 'values' must be a " + f"{type(self).__name__}, ndarray, or Series or Index " + "containing one of those." ) if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") @@ -297,17 +308,12 @@ def __init__( # nanosecond UTC (or tz-naive) unix timestamps values = values.view(DT64NS_DTYPE) - if values.dtype != DT64NS_DTYPE: - raise ValueError( - "The dtype of 'values' is incorrect. Must be 'datetime64[ns]'. " - f"Got {values.dtype} instead." - ) - + _validate_dt64_dtype(values.dtype) dtype = _validate_dt64_dtype(dtype) if freq == "infer": raise ValueError( - "Frequency inference not allowed in DatetimeArray.__init__. " + f"Frequency inference not allowed in {type(self).__name__}.__init__. " "Use 'pd.array()' instead." ) @@ -315,13 +321,6 @@ def __init__( values = values.copy() if freq: freq = to_offset(freq) - if getattr(dtype, "tz", None): - # https://github.com/pandas-dev/pandas/issues/18595 - # Ensure that we have a standard timezone for pytz objects. - # Without this, things like adding an array of timedeltas and - # a tz-aware Timestamp (with a tz specific to its datetime) will - # be incorrect(ish?) for the array as a whole - dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) NDArrayBacked.__init__(self, values=values, dtype=dtype) self._freq = freq @@ -2394,6 +2393,16 @@ def _validate_dt64_dtype(dtype): f"Unexpected value for 'dtype': '{dtype}'. " "Must be 'datetime64[ns]' or DatetimeTZDtype'." ) + + if getattr(dtype, "tz", None): + # https://github.com/pandas-dev/pandas/issues/18595 + # Ensure that we have a standard timezone for pytz objects. + # Without this, things like adding an array of timedeltas and + # a tz-aware Timestamp (with a tz specific to its datetime) will + # be incorrect(ish?) for the array as a whole + dtype = cast(DatetimeTZDtype, dtype) + dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz)) + return dtype diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 3bbb03d88e38d..e08518a54fe6b 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -186,21 +186,22 @@ def __init__( if isinstance(values, type(self)): if explicit_none: - # dont inherit from values + # don't inherit from values pass elif freq is None: freq = values.freq elif freq and values.freq: freq = to_offset(freq) freq, _ = dtl.validate_inferred_freq(freq, values.freq, False) + values = values._ndarray if not isinstance(values, np.ndarray): - msg = ( + raise ValueError( f"Unexpected type '{type(values).__name__}'. 'values' must be a " - "TimedeltaArray, ndarray, or Series or Index containing one of those." + f"{type(self).__name__}, ndarray, or Series or Index " + "containing one of those." ) - raise ValueError(msg) if values.ndim not in [1, 2]: raise ValueError("Only 1-dimensional input arrays are supported.") @@ -214,11 +215,10 @@ def __init__( dtype = _validate_td64_dtype(dtype) if freq == "infer": - msg = ( - "Frequency inference not allowed in TimedeltaArray.__init__. " + raise ValueError( + f"Frequency inference not allowed in {type(self).__name__}.__init__. " "Use 'pd.array()' instead." ) - raise ValueError(msg) if copy: values = values.copy() diff --git a/pandas/tests/arrays/datetimes/test_constructors.py b/pandas/tests/arrays/datetimes/test_constructors.py index ed285f2389959..684b478d1de08 100644 --- a/pandas/tests/arrays/datetimes/test_constructors.py +++ b/pandas/tests/arrays/datetimes/test_constructors.py @@ -87,9 +87,8 @@ def test_non_array_raises(self): def test_bool_dtype_raises(self): arr = np.array([1, 2, 3], dtype="bool") - with pytest.raises( - ValueError, match="The dtype of 'values' is incorrect.*bool" - ): + msg = "Unexpected value for 'dtype': 'bool'. Must be" + with pytest.raises(ValueError, match=msg): DatetimeArray(arr) msg = r"dtype bool cannot be converted to datetime64\[ns\]" diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index ea34e636d890f..e971e311e7d20 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -925,6 +925,9 @@ def test_explicit_none_freq(self): result = DatetimeIndex(rng._data, freq=None) assert result.freq is None + dta = DatetimeArray(rng, freq=None) + assert dta.freq is None + def test_dti_constructor_years_only(self, tz_naive_fixture): tz = tz_naive_fixture # GH 6961 diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 25a0a66ada519..2a5b1be7bddbd 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -263,6 +263,9 @@ def test_explicit_none_freq(self): result = TimedeltaIndex(tdi._data, freq=None) assert result.freq is None + tda = TimedeltaArray(tdi, freq=None) + assert tda.freq is None + def test_from_categorical(self): tdi = timedelta_range(1, periods=5)