Skip to content

Commit 9289f5d

Browse files
authored
BUG: map method on datetimelikes should not work arraywise (#51979)
* BUG: map method on datetimelikes sometimes work arraywise * fix groupby * fix pre-commit
1 parent be1b41b commit 9289f5d

File tree

8 files changed

+25
-33
lines changed

8 files changed

+25
-33
lines changed

doc/source/whatsnew/v2.1.0.rst

+7-2
Original file line numberDiff line numberDiff line change
@@ -140,13 +140,13 @@ Categorical
140140
Datetimelike
141141
^^^^^^^^^^^^
142142
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
143-
-
143+
- Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
144144

145145
Timedelta
146146
^^^^^^^^^
147147
- Bug in :meth:`Timedelta.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsTimedelta`` (:issue:`51494`)
148148
- Bug in :class:`TimedeltaIndex` division or multiplication leading to ``.freq`` of "0 Days" instead of ``None`` (:issue:`51575`)
149-
-
149+
- Bug in :meth:`arrays.TimedeltaArray.map` and :meth:`TimedeltaIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
150150

151151
Timezones
152152
^^^^^^^^^
@@ -197,6 +197,7 @@ Period
197197
^^^^^^
198198
- Bug in :class:`PeriodDtype` constructor failing to raise ``TypeError`` when no argument is passed or when ``None`` is passed (:issue:`27388`)
199199
- Bug in :class:`PeriodDtype` constructor raising ``ValueError`` instead of ``TypeError`` when an invalid type is passed (:issue:`51790`)
200+
- Bug in :meth:`arrays.PeriodArray.map` and :meth:`PeriodIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
200201
-
201202

202203
Plotting
@@ -209,6 +210,10 @@ Groupby/resample/rolling
209210
- Bug in :meth:`DataFrameGroupBy.idxmin`, :meth:`SeriesGroupBy.idxmin`, :meth:`DataFrameGroupBy.idxmax`, :meth:`SeriesGroupBy.idxmax` return wrong dtype when used on empty DataFrameGroupBy or SeriesGroupBy (:issue:`51423`)
210211
- Bug in weighted rolling aggregations when specifying ``min_periods=0`` (:issue:`51449`)
211212
- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` in incorrectly allowing non-fixed ``freq`` when resampling on a :class:`TimedeltaIndex` (:issue:`51896`)
213+
- Bug in :meth:`DataFrame.groupby` and :meth:`Series.groupby`, where, when the index of the
214+
grouped :class:`Series` or :class:`DataFrame` was a :class:`DatetimeIndex`, :class:`TimedeltaIndex`
215+
or :class:`PeriodIndex`, and the ``groupby`` method was given a function as its first argument,
216+
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
212217
-
213218

214219
Reshaping

pandas/core/arrays/datetimelike.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -752,19 +752,8 @@ def map(self, mapper, na_action=None):
752752

753753
from pandas import Index
754754

755-
idx = Index(self)
756-
try:
757-
result = mapper(idx)
758-
759-
# Try to use this result if we can
760-
if isinstance(result, np.ndarray):
761-
result = Index(result)
762-
763-
if not isinstance(result, Index):
764-
raise TypeError("The map function must return an Index object")
765-
except Exception:
766-
result = map_array(self, mapper)
767-
result = Index(result)
755+
result = map_array(self, mapper)
756+
result = Index(result)
768757

769758
if isinstance(result, ABCMultiIndex):
770759
return result.to_numpy()

pandas/tests/apply/test_series_apply.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def test_apply_datetimetz():
204204
tm.assert_series_equal(result, exp)
205205

206206
result = s.apply(lambda x: x.hour)
207-
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
207+
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
208208
tm.assert_series_equal(result, exp)
209209

210210
# not vectorized
@@ -779,7 +779,7 @@ def test_map_datetimetz():
779779
tm.assert_series_equal(result, exp)
780780

781781
result = s.map(lambda x: x.hour)
782-
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
782+
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
783783
tm.assert_series_equal(result, exp)
784784

785785
# not vectorized

pandas/tests/groupby/test_groupby.py

-1
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,6 @@ def test_pass_args_kwargs_duplicate_columns(tsframe, as_index):
261261
2: tsframe[tsframe.index.month == 2].quantile(0.8),
262262
}
263263
expected = DataFrame(ex_data).T
264-
expected.index = expected.index.astype(np.int32)
265264
if not as_index:
266265
# TODO: try to get this more consistent?
267266
expected.index = Index(range(2))

pandas/tests/groupby/test_grouping.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -436,19 +436,17 @@ def test_groupby_grouper_f_sanity_checked(self):
436436
dates = date_range("01-Jan-2013", periods=12, freq="MS")
437437
ts = Series(np.random.randn(12), index=dates)
438438

439-
# GH3035
440-
# index.map is used to apply grouper to the index
441-
# if it fails on the elements, map tries it on the entire index as
442-
# a sequence. That can yield invalid results that cause trouble
443-
# down the line.
444-
# the surprise comes from using key[0:6] rather than str(key)[0:6]
445-
# when the elements are Timestamp.
446-
# the result is Index[0:6], very confusing.
447-
448-
msg = r"Grouper result violates len\(labels\) == len\(data\)"
449-
with pytest.raises(AssertionError, match=msg):
439+
# GH51979
440+
# simple check that the passed function doesn't operates on the whole index
441+
msg = "'Timestamp' object is not subscriptable"
442+
with pytest.raises(TypeError, match=msg):
450443
ts.groupby(lambda key: key[0:6])
451444

445+
result = ts.groupby(lambda x: x).sum()
446+
expected = ts.groupby(ts.index).sum()
447+
expected.index.freq = None
448+
tm.assert_series_equal(result, expected)
449+
452450
def test_grouping_error_on_multidim_input(self, df):
453451
msg = "Grouper for '<class 'pandas.core.frame.DataFrame'>' not 1-dimensional"
454452
with pytest.raises(ValueError, match=msg):

pandas/tests/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_view(self, simple_index):
7373
def test_map_callable(self, simple_index):
7474
index = simple_index
7575
expected = index + index.freq
76-
result = index.map(lambda x: x + x.freq)
76+
result = index.map(lambda x: x + index.freq)
7777
tm.assert_index_equal(result, expected)
7878

7979
# map to NaT

pandas/tests/indexes/test_base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -535,8 +535,9 @@ def test_map_tseries_indices_return_index(self, attr):
535535

536536
def test_map_tseries_indices_accsr_return_index(self):
537537
date_index = tm.makeDateIndex(24, freq="h", name="hourly")
538-
expected = Index(range(24), dtype="int32", name="hourly")
539-
tm.assert_index_equal(expected, date_index.map(lambda x: x.hour), exact=True)
538+
result = date_index.map(lambda x: x.hour)
539+
expected = Index(np.arange(24, dtype="int64"), name="hourly")
540+
tm.assert_index_equal(result, expected, exact=True)
540541

541542
@pytest.mark.parametrize(
542543
"mapper",

pandas/tests/indexes/timedeltas/test_timedelta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def test_map(self):
5454

5555
f = lambda x: x.days
5656
result = rng.map(f)
57-
exp = Index([f(x) for x in rng], dtype=np.int32)
57+
exp = Index([f(x) for x in rng], dtype=np.int64)
5858
tm.assert_index_equal(result, exp)
5959

6060
def test_pass_TimedeltaIndex_to_index(self):

0 commit comments

Comments
 (0)