From 6c8ae4da5c6cd82f9402b3f6e4d2233ffa6dd00b Mon Sep 17 00:00:00 2001 From: Goran Date: Sat, 15 Feb 2025 13:26:19 -0500 Subject: [PATCH 1/6] added dtype check for series apply and map --- pandas/core/algorithms.py | 9 ++++++++- pandas/core/apply.py | 4 +++- pandas/core/series.py | 32 ++++++++++++++++++++++++++------ 3 files changed, 37 insertions(+), 8 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..1b8a63467f48b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -17,6 +17,7 @@ import numpy as np +import pandas as pd from pandas._libs import ( algos, hashtable as htable, @@ -1648,9 +1649,15 @@ def map_array( a MultiIndex will be returned. """ if na_action not in (None, "ignore"): - msg = f"na_action must either be 'ignore' or None, {na_action} was passed" + msg = f"na_acti(on must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) + check = pd.isna(arr) + + def apply_map(x): + if na_action == "ignore" and pd.isna(x): + return x + # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f36fc82fb1a11..90fdf4f0b4d30 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -14,6 +14,7 @@ ) import numpy as np +from pandas.core.dtypes.missing import isna from pandas._libs.internals import BlockValuesRefs from pandas._typing import ( @@ -1389,7 +1390,8 @@ def __init__( def apply(self) -> DataFrame | Series: obj = self.obj - + + if len(obj) == 0: return self.apply_empty_result() diff --git a/pandas/core/series.py b/pandas/core/series.py index 351622135b31f..293727507aa2a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4404,12 +4404,22 @@ def map( 3 I am a rabbit dtype: object """ - if callable(arg): - arg = functools.partial(arg, **kwargs) - new_values = self._map_values(arg, na_action=na_action) - return self._constructor(new_values, index=self.index, copy=False).__finalize__( - self, method="map" - ) + #Check if the dtype is an integer + if pd.api.types.is_integer_dtype(self) and pd.api.types.is_nullable(self.dtype): + #if dtype is nullable int type, ensure NaN values replaced with pd.NA + def map_check(val): + if val is None: + return pd.NA + return val + arg = map_check(arg) + + else: + if callable(arg): + arg = functools.partial(arg, **kwargs) + new_values = self._map_values(arg, na_action=na_action) + return self._constructor(new_values, index=self.index, copy=False).__finalize__( + self, method="map" + ) def _gotitem(self, key, ndim, subset=None) -> Self: """ @@ -4609,6 +4619,16 @@ def apply( Helsinki 2.484907 dtype: float64 """ + # check if dtype is nullable integer + if pd.api.types.is_integer_dtype(self) and pd.api.types.is_nullable(self.dtype): + # def functon to handle NaN as pd.NA + def apply_check(val): + if val is None: + return pd.NA + return val + func = functools.partial(apply_check,func) + + #proceed with usual apply method return SeriesApply( self, func, From 780d48cf0eed5e2bc200a130b425b052b09e919a Mon Sep 17 00:00:00 2001 From: Goran Date: Sat, 15 Feb 2025 21:32:43 -0500 Subject: [PATCH 2/6] additional fixes plus tests --- pandas/core/algorithms.py | 25 +++++++++++++++---------- pandas/core/apply.py | 14 +++++++++----- pandas/tests/apply/test_series_apply.py | 1 + 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1b8a63467f48b..0465c3be64bac 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1651,12 +1651,7 @@ def map_array( if na_action not in (None, "ignore"): msg = f"na_acti(on must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) - - check = pd.isna(arr) - def apply_map(x): - if na_action == "ignore" and pd.isna(x): - return x # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield @@ -1701,8 +1696,18 @@ def apply_map(x): return arr.copy() # we must convert to python types - values = arr.astype(object, copy=False) - if na_action is None: - return lib.map_infer(values, mapper) - else: - return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) + #values = arr.astype(object, copy=False) + + if is_integer_dtype(arr) and is_nullable_dtype(arr.dtype): + def mapper_check(x): + if x is None: + return pd.NA + else: + mapper(x) + values = arr.copy() + + if na_action is None: + #return lib.map_infer(values, mapper) + return np.array([mapper_check(x) for x in values], dtype = arr.dtype) + else: + return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 90fdf4f0b4d30..74e3a8bae114c 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1457,14 +1457,18 @@ def apply_standard(self) -> DataFrame | Series: return func(obj, *self.args, **self.kwargs) elif not self.by_row: return func(obj, *self.args, **self.kwargs) - - if self.args or self.kwargs: - # _map_values does not support args/kwargs - def curried(x): - return func(x, *self.args, **self.kwargs) + + #Check if type is integer and nullable, return pd.NA for None values and + #normal func for other values + if pd.api.types.is_integer_dtype(obj) and pd.api.types.is_nullable_dtype(obj.dtype): + def wrapped_func(x): + if x is None: + return pd.NA + return func(x,*self.args, **self.kwargs) else: curried = func + mapped = obj._map_values(mapper=curried) if len(mapped) and isinstance(mapped[0], ABCSeries): diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 9541b0b7495c7..fd74377365f00 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -665,3 +665,4 @@ def test_series_apply_unpack_nested_data(): result = ser.apply(lambda x: Series(x)) expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]}) tm.assert_frame_equal(result, expected) + From 61452ac856409968742427971567657487a58e4e Mon Sep 17 00:00:00 2001 From: Goran Date: Mon, 17 Feb 2025 12:24:27 -0500 Subject: [PATCH 3/6] Added changes and new test file --- pandas/core/algorithms.py | 4 +- pandas/core/series.py | 2 +- .../tests/apply/test_series_apply_bugFix.py | 45 +++++++++++++++++++ 3 files changed, 48 insertions(+), 3 deletions(-) create mode 100644 pandas/tests/apply/test_series_apply_bugFix.py diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 0465c3be64bac..4a5e944c8a100 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1703,11 +1703,11 @@ def mapper_check(x): if x is None: return pd.NA else: - mapper(x) + return mapper(x) values = arr.copy() if na_action is None: #return lib.map_infer(values, mapper) - return np.array([mapper_check(x) for x in values], dtype = arr.dtype) + return pd.array([mapper_check(x) for x in values], dtype = arr.dtype) else: return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) diff --git a/pandas/core/series.py b/pandas/core/series.py index 293727507aa2a..dc08d97c97ee8 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4626,7 +4626,7 @@ def apply_check(val): if val is None: return pd.NA return val - func = functools.partial(apply_check,func) + self = [apply_check(x) for x in self] #proceed with usual apply method return SeriesApply( diff --git a/pandas/tests/apply/test_series_apply_bugFix.py b/pandas/tests/apply/test_series_apply_bugFix.py new file mode 100644 index 0000000000000..36135673fea57 --- /dev/null +++ b/pandas/tests/apply/test_series_apply_bugFix.py @@ -0,0 +1,45 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.apply.common import series_transform_kernels + +def test_series_map_NAinteger(): + s = pd.Series([1,2,None],dtype="Int32") + + def increment(x): + if x is None: + return pd.NA + return x+1 + + + result = s.map(increment) + + expectedResult = pd.Series([2,3,pd.NA],dtype = "Int32") + + pd.testing.assert_series_equal(result,expectedResult) + +def test_series_apply_NAinteger(): + s = pd.Series([1,2,None],dtype="Int32") + + def increment(x): + if x is None: + return pd.NA + return x+1 + + + result = s.apply(increment) + + expectedResult = pd.Series([2,3,pd.NA],dtype = "Int32") + + pd.testing.assert_series_equal(result,expectedResult) \ No newline at end of file From 59d3268341add888542edd821a980e7fbfcf65a3 Mon Sep 17 00:00:00 2001 From: Goran Date: Mon, 17 Feb 2025 14:46:13 -0500 Subject: [PATCH 4/6] pull request demo --- pandas/tests/apply/test_series_apply_bugFix.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/apply/test_series_apply_bugFix.py b/pandas/tests/apply/test_series_apply_bugFix.py index 36135673fea57..becab57a4f719 100644 --- a/pandas/tests/apply/test_series_apply_bugFix.py +++ b/pandas/tests/apply/test_series_apply_bugFix.py @@ -29,6 +29,8 @@ def increment(x): pd.testing.assert_series_equal(result,expectedResult) + + def test_series_apply_NAinteger(): s = pd.Series([1,2,None],dtype="Int32") From d504d1b1b91d1df10d5fd1d17a18af36fea43412 Mon Sep 17 00:00:00 2001 From: Goran Date: Mon, 17 Feb 2025 15:25:51 -0500 Subject: [PATCH 5/6] pull req test --- pandas/core/apply.py | 2 +- pandas/tests/apply/test_series_apply_bugFix.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 74e3a8bae114c..3d809afd21361 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1465,7 +1465,7 @@ def wrapped_func(x): if x is None: return pd.NA return func(x,*self.args, **self.kwargs) - + #testing123 else: curried = func diff --git a/pandas/tests/apply/test_series_apply_bugFix.py b/pandas/tests/apply/test_series_apply_bugFix.py index becab57a4f719..b34105f625122 100644 --- a/pandas/tests/apply/test_series_apply_bugFix.py +++ b/pandas/tests/apply/test_series_apply_bugFix.py @@ -29,7 +29,7 @@ def increment(x): pd.testing.assert_series_equal(result,expectedResult) - + def test_series_apply_NAinteger(): s = pd.Series([1,2,None],dtype="Int32") From 5c33fe56ae1b64ebfc1fe0e23d362bf3405a7003 Mon Sep 17 00:00:00 2001 From: Goran Date: Mon, 17 Feb 2025 16:07:57 -0500 Subject: [PATCH 6/6] Fixed errors from PR --- pandas/core/algorithms.py | 4 ++-- pandas/core/apply.py | 5 +++-- pandas/core/series.py | 15 ++++++++------- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4a5e944c8a100..47ca45e71af98 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1624,7 +1624,7 @@ def union_with_duplicates( repeats = final_count.reindex(unique_vals).values return np.repeat(unique_vals, repeats) - +import pandas as pd def map_array( arr: ArrayLike, mapper, @@ -1698,7 +1698,7 @@ def map_array( # we must convert to python types #values = arr.astype(object, copy=False) - if is_integer_dtype(arr) and is_nullable_dtype(arr.dtype): + if is_integer_dtype(arr) and is_nullable(arr.dtype): def mapper_check(x): if x is None: return pd.NA diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 3d809afd21361..1a178d6c70a97 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1446,7 +1446,7 @@ def apply_compat(self): except (ValueError, AttributeError, TypeError): result = obj.apply(func, by_row=False) return result - +import pandas as pd def apply_standard(self) -> DataFrame | Series: # caller is responsible for ensuring that f is Callable func = cast(Callable, self.func) @@ -1460,7 +1460,8 @@ def apply_standard(self) -> DataFrame | Series: #Check if type is integer and nullable, return pd.NA for None values and #normal func for other values - if pd.api.types.is_integer_dtype(obj) and pd.api.types.is_nullable_dtype(obj.dtype): + if pd.api.types.is_integer_dtype(obj) and + pd.api.types.is_nullable(obj.dtype): def wrapped_func(x): if x is None: return pd.NA diff --git a/pandas/core/series.py b/pandas/core/series.py index dc08d97c97ee8..9a8902c901540 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4417,7 +4417,8 @@ def map_check(val): if callable(arg): arg = functools.partial(arg, **kwargs) new_values = self._map_values(arg, na_action=na_action) - return self._constructor(new_values, index=self.index, copy=False).__finalize__( + return self._constructor(new_values, index=self.index, copy=False) + .__finalize__( self, method="map" ) @@ -4619,16 +4620,16 @@ def apply( Helsinki 2.484907 dtype: float64 """ - # check if dtype is nullable integer + # check if dtype is nullable integer if pd.api.types.is_integer_dtype(self) and pd.api.types.is_nullable(self.dtype): # def functon to handle NaN as pd.NA - def apply_check(val): - if val is None: - return pd.NA - return val + def apply_check(val): + if val is None: + return pd.NA + return val self = [apply_check(x) for x in self] - #proceed with usual apply method + #proceed with usual apply method return SeriesApply( self, func,