From 0ce55719313839424e76ee8db57100bbfa13f8b4 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 22:49:36 -0700 Subject: [PATCH 01/22] implemented pyarrow timestamp support --- pandas/core/algorithms.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 76f2fdad591ff..8c434c2da2e52 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -59,6 +59,7 @@ ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ( + ArrowDtype, BaseMaskedDtype, CategoricalDtype, ExtensionDtype, @@ -78,6 +79,7 @@ na_value_for_dtype, ) +from pandas._libs.tslibs.timestamps import Timestamp from pandas.core.array_algos.take import take_nd from pandas.core.construction import ( array as pd_array, @@ -1691,6 +1693,14 @@ def map_array( if na_action == "ignore": mapper = mapper[mapper.index.notna()] + if isinstance(arr.dtype, ArrowDtype) and arr.dtype.name.startswith("timestamp"): + try: + # Convert elements to pandas.Timestamp (or datetime64[ns]) for dict lookup + arr = arr.astype("datetime64[ns]") + except Exception: + # fallback: safe, slow path + arr = np.array([Timestamp(x.as_py()) for x in arr]) + # Since values were input this means we came from either # a dict or a series and mapper should be an index indexer = mapper.index.get_indexer(arr) From 469e28ded9a118e25e0c0d037939136c86637f82 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 23:01:35 -0700 Subject: [PATCH 02/22] added test --- pandas/tests/series/methods/test_map.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 84b60a2afe6eb..e10856873a7f3 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -604,3 +604,11 @@ def test_map_kwargs(): result = Series([2, 4, 5]).map(lambda x, y: x + y, y=2) expected = Series([4, 6, 7]) tm.assert_series_equal(result, expected) + +def test_map_arrow_timestamp_dict(): + # GH 61231 + ser = pd.Series(pd.date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") + mapper = {ts: i for i, ts in enumerate(ser)} + result = ser.map(mapper) + expected = pd.Series([0, 1, 2], dtype="int64") + tm.assert_series_equal(result, expected) \ No newline at end of file From 1dbf5d16878fc1fe21386731bb366eb59556620c Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 23:05:40 -0700 Subject: [PATCH 03/22] precommit --- pandas/core/algorithms.py | 4 ++-- pandas/tests/series/methods/test_map.py | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8c434c2da2e52..a05ab17de9e0d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -24,6 +24,7 @@ lib, ) from pandas._libs.missing import NA +from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( AnyArrayLike, ArrayLike, @@ -79,7 +80,6 @@ na_value_for_dtype, ) -from pandas._libs.tslibs.timestamps import Timestamp from pandas.core.array_algos.take import take_nd from pandas.core.construction import ( array as pd_array, @@ -1695,7 +1695,7 @@ def map_array( if isinstance(arr.dtype, ArrowDtype) and arr.dtype.name.startswith("timestamp"): try: - # Convert elements to pandas.Timestamp (or datetime64[ns]) for dict lookup + # Convert elements to pandas.Timestamp (or datetime64[ns]) arr = arr.astype("datetime64[ns]") except Exception: # fallback: safe, slow path diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index e10856873a7f3..180e905b110f7 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -605,10 +605,11 @@ def test_map_kwargs(): expected = Series([4, 6, 7]) tm.assert_series_equal(result, expected) + def test_map_arrow_timestamp_dict(): # GH 61231 - ser = pd.Series(pd.date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") + ser = Series(date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") mapper = {ts: i for i, ts in enumerate(ser)} result = ser.map(mapper) - expected = pd.Series([0, 1, 2], dtype="int64") - tm.assert_series_equal(result, expected) \ No newline at end of file + expected = Series([0, 1, 2], dtype="int64") + tm.assert_series_equal(result, expected) From 1cf2de82b0bc5674519137f228ba2a98a84082fb Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 23:08:53 -0700 Subject: [PATCH 04/22] whatsnew --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 2b437734a451a..870a39bd5ea39 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -844,6 +844,7 @@ Other - Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Bug in :meth:`Series.dt` methods in :class:`ArrowDtype` that were returning incorrect values. (:issue:`57355`) - Bug in :meth:`Series.isin` raising ``TypeError`` when series is large (>10**6) and ``values`` contains NA (:issue:`60678`) +- Bug in :meth:`Series.map` where mapping with a ``dict`` failed to match keys when the Series used ``timestamp[ns][pyarrow]`` dtype. (:issue:`61231`) - Bug in :meth:`Series.mode` where an exception was raised when taking the mode with nullable types with no null values in the series. (:issue:`58926`) - Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) From 52dc7fbd7a4901b650610d7a890b0232b21a36b1 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 23:36:51 -0700 Subject: [PATCH 05/22] added import condition for pyarrow in test --- pandas/tests/series/methods/test_map.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 180e905b110f7..94fdc2ab27a14 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -605,9 +605,10 @@ def test_map_kwargs(): expected = Series([4, 6, 7]) tm.assert_series_equal(result, expected) - def test_map_arrow_timestamp_dict(): # GH 61231 + pytest.importorskip("pyarrow", minversion="10.0.1") + ser = Series(date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") mapper = {ts: i for i, ts in enumerate(ser)} result = ser.map(mapper) From 66215a924e26406e7d93cfc35324eedb459a6521 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Sat, 5 Apr 2025 23:53:02 -0700 Subject: [PATCH 06/22] precommit --- pandas/tests/series/methods/test_map.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 94fdc2ab27a14..4db91653101c9 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -605,6 +605,7 @@ def test_map_kwargs(): expected = Series([4, 6, 7]) tm.assert_series_equal(result, expected) + def test_map_arrow_timestamp_dict(): # GH 61231 pytest.importorskip("pyarrow", minversion="10.0.1") From 4445ecd535895e13ff48617c24c17522d9061120 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Mon, 7 Apr 2025 11:58:43 -0700 Subject: [PATCH 07/22] moved logic from algorithms.py to ArrowExtensionArray.map --- pandas/core/algorithms.py | 10 ---------- pandas/core/arrays/arrow/array.py | 8 ++++++++ 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a05ab17de9e0d..76f2fdad591ff 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -24,7 +24,6 @@ lib, ) from pandas._libs.missing import NA -from pandas._libs.tslibs.timestamps import Timestamp from pandas._typing import ( AnyArrayLike, ArrayLike, @@ -60,7 +59,6 @@ ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.dtypes import ( - ArrowDtype, BaseMaskedDtype, CategoricalDtype, ExtensionDtype, @@ -1693,14 +1691,6 @@ def map_array( if na_action == "ignore": mapper = mapper[mapper.index.notna()] - if isinstance(arr.dtype, ArrowDtype) and arr.dtype.name.startswith("timestamp"): - try: - # Convert elements to pandas.Timestamp (or datetime64[ns]) - arr = arr.astype("datetime64[ns]") - except Exception: - # fallback: safe, slow path - arr = np.array([Timestamp(x.as_py()) for x in arr]) - # Since values were input this means we came from either # a dict or a series and mapper should be an index indexer = mapper.index.get_indexer(arr) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9295cf7873d98..a3ea431188d03 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1483,6 +1483,14 @@ def to_numpy( def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) + if self.dtype.name.startswith("timestamp"): + try: + # Convert elements to pandas.Timestamp (or datetime64[ns]) + self = self.astype("datetime64[ns]") + except Exception: + # fallback: safe, slow path + self = np.array([Timestamp(x.as_py()) for x in self]) + return map_array(self, mapper, na_action=na_action) else: return super().map(mapper, na_action) From 49e130c0b208bcf8e860ac3622771f00a0ec1a9c Mon Sep 17 00:00:00 2001 From: arthurlw Date: Mon, 7 Apr 2025 13:45:34 -0700 Subject: [PATCH 08/22] Updated condition --- pandas/core/arrays/arrow/array.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index a3ea431188d03..0833570025938 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1483,14 +1483,8 @@ def to_numpy( def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) - if self.dtype.name.startswith("timestamp"): - try: - # Convert elements to pandas.Timestamp (or datetime64[ns]) - self = self.astype("datetime64[ns]") - except Exception: - # fallback: safe, slow path - self = np.array([Timestamp(x.as_py()) for x in self]) - return map_array(self, mapper, na_action=na_action) + elif self.dtype == "timestamp[ns][pyarrow]": + return map_array(self.to_numpy(dtype=object), mapper, na_action=na_action) else: return super().map(mapper, na_action) From 79c1fe256b0c56c8d38d75e5693110bc56d32e53 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Mon, 14 Apr 2025 11:33:19 -0700 Subject: [PATCH 09/22] updated according to reviewer suggestions --- pandas/core/arrays/arrow/array.py | 6 +++--- pandas/tests/series/methods/test_map.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 0833570025938..6f36518d1d3ab 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1481,10 +1481,10 @@ def to_numpy( return result def map(self, mapper, na_action: Literal["ignore"] | None = None): - if is_numeric_dtype(self.dtype): + if is_numeric_dtype(self.dtype) or self.dtype == "timestamp[ns][pyarrow]": return map_array(self.to_numpy(), mapper, na_action=na_action) - elif self.dtype == "timestamp[ns][pyarrow]": - return map_array(self.to_numpy(dtype=object), mapper, na_action=na_action) + # elif self.dtype == "timestamp[ns][pyarrow]": + # return map_array(self.to_numpy(), mapper, na_action=na_action) else: return super().map(mapper, na_action) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 4db91653101c9..6e695c9b4d0e6 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -608,7 +608,7 @@ def test_map_kwargs(): def test_map_arrow_timestamp_dict(): # GH 61231 - pytest.importorskip("pyarrow", minversion="10.0.1") + pytest.importorskip("pyarrow") ser = Series(date_range("2023-01-01", periods=3)).astype("timestamp[ns][pyarrow]") mapper = {ts: i for i, ts in enumerate(ser)} From 19d28704d720aaf6292a653748e483d0acb431b6 Mon Sep 17 00:00:00 2001 From: Arthur Laureus Wigo <126365160+arthurlw@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:06:03 -0700 Subject: [PATCH 10/22] reverted array.py logic --- pandas/core/arrays/arrow/array.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 6262a721f10ef..b5de0c3078f08 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1481,10 +1481,10 @@ def to_numpy( return result def map(self, mapper, na_action: Literal["ignore"] | None = None): - if is_numeric_dtype(self.dtype) or self.dtype == "timestamp[ns][pyarrow]": + if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) - # elif self.dtype == "timestamp[ns][pyarrow]": - # return map_array(self.to_numpy(), mapper, na_action=na_action) + elif self.dtype == "timestamp[ns][pyarrow]": + return map_array(self.to_numpy(dtype=object), mapper, na_action=na_action) else: return super().map(mapper, na_action) From 88c180cca99ee63c5f3d9aa302e163cd843e4d79 Mon Sep 17 00:00:00 2001 From: Arthur Laureus Wigo <126365160+arthurlw@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:08:25 -0700 Subject: [PATCH 11/22] change cast to dtype="datetime64[ns]" --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b5de0c3078f08..89569ca185e40 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1484,7 +1484,7 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) elif self.dtype == "timestamp[ns][pyarrow]": - return map_array(self.to_numpy(dtype=object), mapper, na_action=na_action) + return map_array(self.to_numpy(dtype="datetime64[ns]"), mapper, na_action=na_action) else: return super().map(mapper, na_action) From a421e6608deca756f6aa14621f26a431c5d03c5e Mon Sep 17 00:00:00 2001 From: Arthur Laureus Wigo <126365160+arthurlw@users.noreply.github.com> Date: Mon, 14 Apr 2025 12:20:45 -0700 Subject: [PATCH 12/22] precommit --- pandas/core/arrays/arrow/array.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 89569ca185e40..3908d31599e5e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1484,7 +1484,9 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) elif self.dtype == "timestamp[ns][pyarrow]": - return map_array(self.to_numpy(dtype="datetime64[ns]"), mapper, na_action=na_action) + return map_array( + self.to_numpy(dtype="datetime64[ns]"), mapper, na_action=na_action + ) else: return super().map(mapper, na_action) From 04dda0d7e80907707d42cc2fa9aed659b05d5701 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 15 Apr 2025 15:15:16 -0700 Subject: [PATCH 13/22] updated test and map condition --- pandas/core/arrays/arrow/array.py | 6 +----- pandas/tests/extension/test_arrow.py | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 3908d31599e5e..2705a0ac42781 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1481,12 +1481,8 @@ def to_numpy( return result def map(self, mapper, na_action: Literal["ignore"] | None = None): - if is_numeric_dtype(self.dtype): + if is_numeric_dtype(self.dtype) or self.dtype.kind in "mM": return map_array(self.to_numpy(), mapper, na_action=na_action) - elif self.dtype == "timestamp[ns][pyarrow]": - return map_array( - self.to_numpy(dtype="datetime64[ns]"), mapper, na_action=na_action - ) else: return super().map(mapper, na_action) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fc5930ebcd8ac..5d8e9db35f026 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -281,7 +281,7 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": - result = data_missing.map(lambda x: x, na_action=na_action) + result = data_missing.map(lambda x: x, na_action=na_action).to_numpy(dtype=object) expected = data_missing.to_numpy(dtype=object) tm.assert_numpy_array_equal(result, expected) else: From 02ae824838bec8333b5a3613730112605fe2bb7a Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 15 Apr 2025 15:44:40 -0700 Subject: [PATCH 14/22] updated test --- pandas/tests/extension/test_arrow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 5d8e9db35f026..70c68afd9b6af 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -281,9 +281,9 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": - result = data_missing.map(lambda x: x, na_action=na_action).to_numpy(dtype=object) - expected = data_missing.to_numpy(dtype=object) - tm.assert_numpy_array_equal(result, expected) + result = data_missing.map(lambda x: x, na_action=na_action) + expected = pd.Series(data_missing.to_numpy()).map(lambda x: x, na_action=na_action) + tm.assert_series_equal(result, expected, check_dtype=False) else: result = data_missing.map(lambda x: x, na_action=na_action) if data_missing.dtype == "float32[pyarrow]": From 73c403947dfdf748ef81438bdcf2baf772a5d525 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 15 Apr 2025 16:18:35 -0700 Subject: [PATCH 15/22] wrap with pd.series --- pandas/tests/extension/test_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 70c68afd9b6af..4f8d06e51790d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -281,7 +281,7 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": - result = data_missing.map(lambda x: x, na_action=na_action) + result = pd.Series(data_missing.map(lambda x: x, na_action=na_action)) expected = pd.Series(data_missing.to_numpy()).map(lambda x: x, na_action=na_action) tm.assert_series_equal(result, expected, check_dtype=False) else: From b9183233a26d8c5109c45740dfebdb824349eddd Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 15 Apr 2025 17:13:39 -0700 Subject: [PATCH 16/22] casted typing --- pandas/tests/extension/test_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 4f8d06e51790d..acfc558414624 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -282,7 +282,7 @@ def test_compare_scalar(self, data, comparison_op): def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": result = pd.Series(data_missing.map(lambda x: x, na_action=na_action)) - expected = pd.Series(data_missing.to_numpy()).map(lambda x: x, na_action=na_action) + expected = pd.Series(data_missing.to_numpy()).astype(result.dtype) tm.assert_series_equal(result, expected, check_dtype=False) else: result = data_missing.map(lambda x: x, na_action=na_action) From f3545bfbd420c7a3f9be7301b59becf4e08de23d Mon Sep 17 00:00:00 2001 From: arthurlw Date: Tue, 15 Apr 2025 18:37:25 -0700 Subject: [PATCH 17/22] updated test logic --- pandas/tests/extension/test_arrow.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index acfc558414624..1e46ff719715c 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -281,8 +281,14 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": - result = pd.Series(data_missing.map(lambda x: x, na_action=na_action)) - expected = pd.Series(data_missing.to_numpy()).astype(result.dtype) + result = pd.Series( + np.asarray( + data_missing.map(lambda x: x, na_action=na_action), dtype="int64" + ) + ) + expected = pd.Series( + data_missing.to_numpy().astype(result.dtype).view("int64") + ) tm.assert_series_equal(result, expected, check_dtype=False) else: result = data_missing.map(lambda x: x, na_action=na_action) From 4526bb133fc427f8cb609c08e0e3981bafeda6bb Mon Sep 17 00:00:00 2001 From: arthurlw Date: Thu, 17 Apr 2025 10:49:42 -0700 Subject: [PATCH 18/22] updated testing logic --- pandas/tests/extension/test_arrow.py | 37 +++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 1e46ff719715c..37eec9a108b43 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -64,6 +64,7 @@ is_string_dtype, is_unsigned_integer_dtype, ) +from pandas.core.dtypes.common import is_timedelta64_dtype from pandas.tests.extension import base pa = pytest.importorskip("pyarrow") @@ -281,15 +282,33 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): if data_missing.dtype.kind in "mM": - result = pd.Series( - np.asarray( - data_missing.map(lambda x: x, na_action=na_action), dtype="int64" - ) - ) - expected = pd.Series( - data_missing.to_numpy().astype(result.dtype).view("int64") - ) - tm.assert_series_equal(result, expected, check_dtype=False) + mapped = data_missing.map(lambda x: x, na_action=na_action) + result = pd.Series(mapped) + expected = pd.Series(data_missing.to_numpy()) + + orig_dtype = expected.dtype + + if result.dtype == "float64" and ( + is_datetime64_any_dtype(orig_dtype) + or is_timedelta64_dtype(orig_dtype) + or isinstance(orig_dtype, pd.DatetimeTZDtype) + ): + result = result.astype(orig_dtype) + + if isinstance(orig_dtype, pd.DatetimeTZDtype): + pass + elif is_datetime64_any_dtype(orig_dtype): + result = result.astype("datetime64[ns]").astype("int64") + expected = expected.astype("datetime64[ns]").astype("int64") + result = pd.Series(result) + expected = pd.Series(expected) + elif is_timedelta64_dtype(orig_dtype): + result = result.astype("timedelta64[ns]") + expected = expected.astype("timedelta64[ns]") + + + tm.assert_series_equal(result, expected, check_dtype=False, check_exact=False) + else: result = data_missing.map(lambda x: x, na_action=na_action) if data_missing.dtype == "float32[pyarrow]": From 52cd37f246b8c8ce7b457d06bc64679f13ab3485 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Thu, 17 Apr 2025 11:17:28 -0700 Subject: [PATCH 19/22] precommit --- pandas/tests/extension/test_arrow.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 37eec9a108b43..388a065d2268d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -46,6 +46,7 @@ pa_version_under20p0, ) +from pandas.core.dtypes.common import is_timedelta64_dtype from pandas.core.dtypes.dtypes import ( ArrowDtype, CategoricalDtypeType, @@ -64,7 +65,6 @@ is_string_dtype, is_unsigned_integer_dtype, ) -from pandas.core.dtypes.common import is_timedelta64_dtype from pandas.tests.extension import base pa = pytest.importorskip("pyarrow") @@ -306,8 +306,9 @@ def test_map(self, data_missing, na_action): result = result.astype("timedelta64[ns]") expected = expected.astype("timedelta64[ns]") - - tm.assert_series_equal(result, expected, check_dtype=False, check_exact=False) + tm.assert_series_equal( + result, expected, check_dtype=False, check_exact=False + ) else: result = data_missing.map(lambda x: x, na_action=na_action) From 25e57c34158158de2cd5d2c0843f3e5babbeb3e5 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Thu, 17 Apr 2025 14:36:17 -0700 Subject: [PATCH 20/22] improved code readability --- pandas/tests/extension/test_arrow.py | 29 ++++++++++------------------ 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 388a065d2268d..df6d79ba166c1 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -281,42 +281,33 @@ def test_compare_scalar(self, data, comparison_op): @pytest.mark.parametrize("na_action", [None, "ignore"]) def test_map(self, data_missing, na_action): - if data_missing.dtype.kind in "mM": - mapped = data_missing.map(lambda x: x, na_action=na_action) - result = pd.Series(mapped) - expected = pd.Series(data_missing.to_numpy()) + result = data_missing.map(lambda x: x, na_action=na_action) - orig_dtype = expected.dtype + if data_missing.dtype == "float32[pyarrow]": + expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) + tm.assert_numpy_array_equal(result, expected) - if result.dtype == "float64" and ( - is_datetime64_any_dtype(orig_dtype) - or is_timedelta64_dtype(orig_dtype) - or isinstance(orig_dtype, pd.DatetimeTZDtype) - ): - result = result.astype(orig_dtype) + elif data_missing.dtype.kind in "mM": + expected = pd.Series(data_missing.to_numpy()) + orig_dtype = expected.dtype if isinstance(orig_dtype, pd.DatetimeTZDtype): pass elif is_datetime64_any_dtype(orig_dtype): result = result.astype("datetime64[ns]").astype("int64") expected = expected.astype("datetime64[ns]").astype("int64") - result = pd.Series(result) - expected = pd.Series(expected) elif is_timedelta64_dtype(orig_dtype): result = result.astype("timedelta64[ns]") expected = expected.astype("timedelta64[ns]") + result = pd.Series(result) + expected = pd.Series(expected) tm.assert_series_equal( result, expected, check_dtype=False, check_exact=False ) else: - result = data_missing.map(lambda x: x, na_action=na_action) - if data_missing.dtype == "float32[pyarrow]": - # map roundtrips through objects, which converts to float64 - expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) - else: - expected = data_missing.to_numpy() + expected = data_missing.to_numpy() tm.assert_numpy_array_equal(result, expected) def test_astype_str(self, data, request, using_infer_string): From c9f47ff71db4142f48a64ed018418b09918486d0 Mon Sep 17 00:00:00 2001 From: arthurlw Date: Thu, 17 Apr 2025 17:40:09 -0700 Subject: [PATCH 21/22] return with proper typing --- pandas/core/arrays/arrow/array.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 2705a0ac42781..1a33da8250683 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1481,10 +1481,20 @@ def to_numpy( return result def map(self, mapper, na_action: Literal["ignore"] | None = None): - if is_numeric_dtype(self.dtype) or self.dtype.kind in "mM": + from pandas import Series + + pa_type = self._pa_array.type + + if pa.types.is_timestamp(pa_type) or pa.types.is_duration(pa_type): + datelike = self._maybe_convert_datelike_array() + temp = Series(datelike, dtype=datelike.dtype) + mapped = temp.map(mapper, na_action=na_action) + return mapped._values + + if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action) - else: - return super().map(mapper, na_action) + + return super().map(mapper, na_action=na_action) @doc(ExtensionArray.duplicated) def duplicated( From 105d92b841c85610a0421d758e10b6b920c2d40d Mon Sep 17 00:00:00 2001 From: arthurlw Date: Fri, 18 Apr 2025 11:14:11 -0700 Subject: [PATCH 22/22] added to_numpy() call --- pandas/core/arrays/arrow/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 1a33da8250683..cf55bf736835a 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1489,7 +1489,7 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None): datelike = self._maybe_convert_datelike_array() temp = Series(datelike, dtype=datelike.dtype) mapped = temp.map(mapper, na_action=na_action) - return mapped._values + return mapped.to_numpy() if is_numeric_dtype(self.dtype): return map_array(self.to_numpy(), mapper, na_action=na_action)