diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3294af742fae0..569533acdd4d8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -853,6 +853,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.map` where mapping with a ``dict`` failed to match keys when the Series used ``timestamp[ns][pyarrow]`` dtype. (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index d7187b57a69e4..cf55bf736835a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1481,10 +1481,20 @@ def to_numpy( return result def map(self, mapper, na_action: Literal["ignore"] | None = None): + from pandas import Series + + pa_type = self._pa_array.type + + if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type): + datelike = self._maybe_convert_datelike_array() + temp = Series(datelike, dtype=datelike.dtype) + mapped = temp.map(mapper, na_action=na_action) + return mapped.to_numpy() + if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) - else: - return super().map(mapper, na_action) + + return super().map(mapper, na_action=na_action) @doc(ExtensionArray.duplicated) def duplicated( diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fc5930ebcd8ac..df6d79ba166c1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -46,6 +46,7 @@ pa_version_under20p0, ) +from pandas.core.dtypes.common import is_timedelta64_dtype from pandas.core.dtypes.dtypes import ( ArrowDtype, CategoricalDtypeType, @@ -280,17 +281,33 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): - if data_missing.dtype.kind in "mM": - result = data_missing.map(lambda x: x, na_action=na_action) - expected = data_missing.to_numpy(dtype=object) + result = data_missing.map(lambda x: x, na_action=na_action) + + if data_missing.dtype == "float32[pyarrow]": + expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) tm.assert_numpy_array_equal(result, expected) + + elif data_missing.dtype.kind in "mM": + expected = pd.Series(data_missing.to_numpy()) + + orig_dtype = expected.dtype + if isinstance(orig_dtype, pd.DatetimeTZDtype): + pass + elif is_datetime64_any_dtype(orig_dtype): + result = result.astype("datetime64[ns]").astype("int64") + expected = expected.astype("datetime64[ns]").astype("int64") + elif is_timedelta64_dtype(orig_dtype): + result = result.astype("timedelta64[ns]") + expected = expected.astype("timedelta64[ns]") + + result = pd.Series(result) + expected = pd.Series(expected) + tm.assert_series_equal( + result, expected, check_dtype=False, check_exact=False + ) + else: - result = data_missing.map(lambda x: x, na_action=na_action) - if data_missing.dtype == "float32[pyarrow]": - # map roundtrips through objects, which converts to float64 - expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) - else: - expected = data_missing.to_numpy() + expected = data_missing.to_numpy() tm.assert_numpy_array_equal(result, expected) def test_astype_str(self, data, request, using_infer_string): diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 384b7ce3dc985..6d7f5517d3981 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -606,6 +606,17 @@ def test_map_kwargs(): tm.assert_series_equal(result, expected) +def test_map_arrow_timestamp_dict(): + # GH 61231 + pytest.importorskip("pyarrow") + + ser = Series(date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") + mapper = {ts: i for i, ts in enumerate(ser)} + result = ser.map(mapper) + expected = Series([0, 1, 2], dtype="int64") + tm.assert_series_equal(result, expected) + + def test_map_arg_as_kwarg(): with tm.assert_produces_warning( FutureWarning, match="`arg` has been renamed to `func`"