Skip to content

BUG: respect freq=None in DTA constructor #47296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ Datetimelike
- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`)
- Bug in :meth:`DatetimeIndex.resolution` incorrectly returning "day" instead of "nanosecond" for nanosecond-resolution indexes (:issue:`46903`)
- Bug in :class:`Timestamp` with an integer or float value and ``unit="Y"`` or ``unit="M"`` giving slightly-wrong results (:issue:`47266`)
- Bug in :class:`DatetimeArray` construction when passed another :class:`DatetimeArray` and ``freq=None`` incorrectly inferring the freq from the given array (:issue:`47296`)
-

Timedelta
Expand Down
47 changes: 28 additions & 19 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from typing import (
TYPE_CHECKING,
Literal,
cast,
)
import warnings

Expand Down Expand Up @@ -256,15 +257,26 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
_freq = None

def __init__(
self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False
self, values, dtype=DT64NS_DTYPE, freq=lib.no_default, copy: bool = False
) -> None:
values = extract_array(values, extract_numpy=True)
if isinstance(values, IntegerArray):
values = values.to_numpy("int64", na_value=iNaT)

inferred_freq = getattr(values, "_freq", None)
explicit_none = freq is None
freq = freq if freq is not lib.no_default else None

if isinstance(values, type(self)):
if explicit_none:
# don't inherit from values
pass
elif freq is None:
freq = values.freq
elif freq and values.freq:
freq = to_offset(freq)
freq, _ = dtl.validate_inferred_freq(freq, values.freq, False)

# validation
dtz = getattr(dtype, "tz", None)
if dtz and values.tz is None:
Expand All @@ -279,14 +291,13 @@ def __init__(
elif values.tz:
dtype = values.dtype

if freq is None:
freq = values.freq
values = values._ndarray

if not isinstance(values, np.ndarray):
raise ValueError(
f"Unexpected type '{type(values).__name__}'. 'values' must be "
"a DatetimeArray, ndarray, or Series or Index containing one of those."
f"Unexpected type '{type(values).__name__}'. 'values' must be a "
f"{type(self).__name__}, ndarray, or Series or Index "
"containing one of those."
)
if values.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")
Expand All @@ -297,31 +308,19 @@ def __init__(
# nanosecond UTC (or tz-naive) unix timestamps
values = values.view(DT64NS_DTYPE)

if values.dtype != DT64NS_DTYPE:
raise ValueError(
"The dtype of 'values' is incorrect. Must be 'datetime64[ns]'. "
f"Got {values.dtype} instead."
)

_validate_dt64_dtype(values.dtype)
dtype = _validate_dt64_dtype(dtype)

if freq == "infer":
raise ValueError(
"Frequency inference not allowed in DatetimeArray.__init__. "
f"Frequency inference not allowed in {type(self).__name__}.__init__. "
"Use 'pd.array()' instead."
)

if copy:
values = values.copy()
if freq:
freq = to_offset(freq)
if getattr(dtype, "tz", None):
# https://github.com/pandas-dev/pandas/issues/18595
# Ensure that we have a standard timezone for pytz objects.
# Without this, things like adding an array of timedeltas and
# a tz-aware Timestamp (with a tz specific to its datetime) will
# be incorrect(ish?) for the array as a whole
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))

NDArrayBacked.__init__(self, values=values, dtype=dtype)
self._freq = freq
Expand Down Expand Up @@ -2394,6 +2393,16 @@ def _validate_dt64_dtype(dtype):
f"Unexpected value for 'dtype': '{dtype}'. "
"Must be 'datetime64[ns]' or DatetimeTZDtype'."
)

if getattr(dtype, "tz", None):
# https://github.com/pandas-dev/pandas/issues/18595
# Ensure that we have a standard timezone for pytz objects.
# Without this, things like adding an array of timedeltas and
# a tz-aware Timestamp (with a tz specific to its datetime) will
# be incorrect(ish?) for the array as a whole
dtype = cast(DatetimeTZDtype, dtype)
dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))

return dtype


Expand Down
14 changes: 7 additions & 7 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,21 +186,22 @@ def __init__(

if isinstance(values, type(self)):
if explicit_none:
# dont inherit from values
# don't inherit from values
pass
elif freq is None:
freq = values.freq
elif freq and values.freq:
freq = to_offset(freq)
freq, _ = dtl.validate_inferred_freq(freq, values.freq, False)

values = values._ndarray

if not isinstance(values, np.ndarray):
msg = (
raise ValueError(
f"Unexpected type '{type(values).__name__}'. 'values' must be a "
"TimedeltaArray, ndarray, or Series or Index containing one of those."
f"{type(self).__name__}, ndarray, or Series or Index "
"containing one of those."
)
raise ValueError(msg)
if values.ndim not in [1, 2]:
raise ValueError("Only 1-dimensional input arrays are supported.")

Expand All @@ -214,11 +215,10 @@ def __init__(
dtype = _validate_td64_dtype(dtype)

if freq == "infer":
msg = (
"Frequency inference not allowed in TimedeltaArray.__init__. "
raise ValueError(
f"Frequency inference not allowed in {type(self).__name__}.__init__. "
"Use 'pd.array()' instead."
)
raise ValueError(msg)

if copy:
values = values.copy()
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/arrays/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,8 @@ def test_non_array_raises(self):
def test_bool_dtype_raises(self):
arr = np.array([1, 2, 3], dtype="bool")

with pytest.raises(
ValueError, match="The dtype of 'values' is incorrect.*bool"
):
msg = "Unexpected value for 'dtype': 'bool'. Must be"
with pytest.raises(ValueError, match=msg):
DatetimeArray(arr)

msg = r"dtype bool cannot be converted to datetime64\[ns\]"
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/datetimes/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,9 @@ def test_explicit_none_freq(self):
result = DatetimeIndex(rng._data, freq=None)
assert result.freq is None

dta = DatetimeArray(rng, freq=None)
assert dta.freq is None

def test_dti_constructor_years_only(self, tz_naive_fixture):
tz = tz_naive_fixture
# GH 6961
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/indexes/timedeltas/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,9 @@ def test_explicit_none_freq(self):
result = TimedeltaIndex(tdi._data, freq=None)
assert result.freq is None

tda = TimedeltaArray(tdi, freq=None)
assert tda.freq is None

def test_from_categorical(self):
tdi = timedelta_range(1, periods=5)

Expand Down