Skip to content

BUG: always strip .freq when putting DTI/TDI into Series/DataFrame #41425

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ Other
- Bug in :func:`pandas.util.show_versions` where console JSON output was not proper JSON (:issue:`39701`)
- Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`)
- Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`)
- Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`)

.. ---------------------------------------------------------------------------

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,8 +498,8 @@ def to_numpy(

>>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
>>> ser.to_numpy(dtype=object)
array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
dtype=object)

Or ``dtype='datetime64[ns]'`` to return an ndarray of native
Expand Down
9 changes: 5 additions & 4 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@
from pandas.core.internals.blocks import (
ensure_block_shape,
external_values,
maybe_coerce_values,
new_block,
to_native_types,
)
Expand Down Expand Up @@ -701,7 +702,7 @@ def __init__(

if verify_integrity:
self._axes = [ensure_index(ax) for ax in axes]
self.arrays = [ensure_wrapped_if_datetimelike(arr) for arr in arrays]
self.arrays = [maybe_coerce_values(arr) for arr in arrays]
self._verify_integrity()

def _verify_integrity(self) -> None:
Expand Down Expand Up @@ -814,7 +815,7 @@ def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):

# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
# but we should avoid that and pass directly the proper array
value = ensure_wrapped_if_datetimelike(value)
value = maybe_coerce_values(value)

assert isinstance(value, (np.ndarray, ExtensionArray))
assert value.ndim == 1
Expand Down Expand Up @@ -873,7 +874,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike) -> None:
raise ValueError(
f"Expected a 1D array, got an array with shape {value.shape}"
)
value = ensure_wrapped_if_datetimelike(value)
value = maybe_coerce_values(value)

# TODO self.arrays can be empty
# assert len(value) == len(self.arrays[0])
Expand Down Expand Up @@ -1188,7 +1189,7 @@ def __init__(
assert len(arrays) == 1
self._axes = [ensure_index(ax) for ax in self._axes]
arr = arrays[0]
arr = ensure_wrapped_if_datetimelike(arr)
arr = maybe_coerce_values(arr)
if isinstance(arr, ABCPandasArray):
arr = arr.to_numpy()
self.arrays = [arr]
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1860,6 +1860,10 @@ def maybe_coerce_values(values) -> ArrayLike:
if issubclass(values.dtype.type, str):
values = np.array(values, dtype=object)

if isinstance(values, (DatetimeArray, TimedeltaArray)) and values.freq is not None:
# freq is only stored in DatetimeIndex/TimedeltaIndex, not in Series/DataFrame
values = values._with_freq(None)

return values


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,8 @@ def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

>>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
>>> np.asarray(tzser, dtype="object")
array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
dtype=object)

Or the values may be localized to UTC and the tzinfo discarded with
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/extension/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ class TestDatetimeDtype(BaseDatetimeTests, base.BaseDtypeTests):


class TestConstructors(BaseDatetimeTests, base.BaseConstructorsTests):
pass
def test_series_constructor(self, data):
# Series construction drops any .freq attr
data = data._with_freq(None)
super().test_series_constructor(data)


class TestGetitem(BaseDatetimeTests, base.BaseGetitemTests):
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def test_set_index_cast_datetimeindex(self):
idf = df.set_index("A")
assert isinstance(idf.index, DatetimeIndex)

def test_set_index_dst(self, using_array_manager):
def test_set_index_dst(self):
di = date_range("2006-10-29 00:00:00", periods=3, freq="H", tz="US/Pacific")

df = DataFrame(data={"a": [0, 1, 2], "b": [3, 4, 5]}, index=di).reset_index()
Expand All @@ -106,8 +106,7 @@ def test_set_index_dst(self, using_array_manager):
data={"a": [0, 1, 2], "b": [3, 4, 5]},
index=Index(di, name="index"),
)
if not using_array_manager:
exp.index = exp.index._with_freq(None)
exp.index = exp.index._with_freq(None)
tm.assert_frame_equal(res, exp)

# GH#12920
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/window/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,7 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture):
),
],
)
def test_rolling_window_as_string(center, expected_data, using_array_manager):
def test_rolling_window_as_string(center, expected_data):
# see gh-22590
date_today = datetime.now()
days = date_range(date_today, date_today + timedelta(365), freq="D")
Expand All @@ -602,9 +602,7 @@ def test_rolling_window_as_string(center, expected_data, using_array_manager):
].agg("max")

index = days.rename("DateCol")
if not using_array_manager:
# INFO(ArrayManager) preserves the frequence of the index
index = index._with_freq(None)
index = index._with_freq(None)
expected = Series(expected_data, index=index, name="metric")
tm.assert_series_equal(result, expected)

Expand Down