diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..47ca45e71af98 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -17,6 +17,7 @@ import numpy as np +import pandas as pd from pandas._libs import ( algos, hashtable as htable, @@ -1623,7 +1624,7 @@ def union_with_duplicates( repeats = final_count.reindex(unique_vals).values return np.repeat(unique_vals, repeats) - +import pandas as pd def map_array( arr: ArrayLike, mapper, @@ -1648,9 +1649,10 @@ def map_array( a MultiIndex will be returned. """ if na_action not in (None, "ignore"): - msg = f"na_action must either be 'ignore' or None, {na_action} was passed" + msg = f"na_acti(on must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) - + + # we can fastpath dict/Series to an efficient map # as we know that we are not going to have to yield # python types @@ -1694,8 +1696,18 @@ def map_array( return arr.copy() # we must convert to python types - values = arr.astype(object, copy=False) - if na_action is None: - return lib.map_infer(values, mapper) - else: - return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) + #values = arr.astype(object, copy=False) + + if is_integer_dtype(arr) and is_nullable(arr.dtype): + def mapper_check(x): + if x is None: + return pd.NA + else: + return mapper(x) + values = arr.copy() + + if na_action is None: + #return lib.map_infer(values, mapper) + return pd.array([mapper_check(x) for x in values], dtype = arr.dtype) + else: + return lib.map_infer_mask(values, mapper, mask=isna(values).view(np.uint8)) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index f36fc82fb1a11..1a178d6c70a97 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -14,6 +14,7 @@ ) import numpy as np +from pandas.core.dtypes.missing import isna from pandas._libs.internals import BlockValuesRefs from pandas._typing import ( @@ -1389,7 +1390,8 @@ def __init__( def apply(self) -> DataFrame | Series: obj = self.obj - + + if len(obj) == 0: return self.apply_empty_result() @@ -1444,7 +1446,7 @@ def apply_compat(self): except (ValueError, AttributeError, TypeError): result = obj.apply(func, by_row=False) return result - +import pandas as pd def apply_standard(self) -> DataFrame | Series: # caller is responsible for ensuring that f is Callable func = cast(Callable, self.func) @@ -1455,14 +1457,19 @@ def apply_standard(self) -> DataFrame | Series: return func(obj, *self.args, **self.kwargs) elif not self.by_row: return func(obj, *self.args, **self.kwargs) - - if self.args or self.kwargs: - # _map_values does not support args/kwargs - def curried(x): - return func(x, *self.args, **self.kwargs) - + + #Check if type is integer and nullable, return pd.NA for None values and + #normal func for other values + if pd.api.types.is_integer_dtype(obj) and + pd.api.types.is_nullable(obj.dtype): + def wrapped_func(x): + if x is None: + return pd.NA + return func(x,*self.args, **self.kwargs) + #testing123 else: curried = func + mapped = obj._map_values(mapper=curried) if len(mapped) and isinstance(mapped[0], ABCSeries): diff --git a/pandas/core/series.py b/pandas/core/series.py index 351622135b31f..9a8902c901540 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4404,12 +4404,23 @@ def map( 3 I am a rabbit dtype: object """ - if callable(arg): - arg = functools.partial(arg, **kwargs) - new_values = self._map_values(arg, na_action=na_action) - return self._constructor(new_values, index=self.index, copy=False).__finalize__( - self, method="map" - ) + #Check if the dtype is an integer + if pd.api.types.is_integer_dtype(self) and pd.api.types.is_nullable(self.dtype): + #if dtype is nullable int type, ensure NaN values replaced with pd.NA + def map_check(val): + if val is None: + return pd.NA + return val + arg = map_check(arg) + + else: + if callable(arg): + arg = functools.partial(arg, **kwargs) + new_values = self._map_values(arg, na_action=na_action) + return self._constructor(new_values, index=self.index, copy=False) + .__finalize__( + self, method="map" + ) def _gotitem(self, key, ndim, subset=None) -> Self: """ @@ -4609,6 +4620,16 @@ def apply( Helsinki 2.484907 dtype: float64 """ + # check if dtype is nullable integer + if pd.api.types.is_integer_dtype(self) and pd.api.types.is_nullable(self.dtype): + # def functon to handle NaN as pd.NA + def apply_check(val): + if val is None: + return pd.NA + return val + self = [apply_check(x) for x in self] + + #proceed with usual apply method return SeriesApply( self, func, diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 9541b0b7495c7..fd74377365f00 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -665,3 +665,4 @@ def test_series_apply_unpack_nested_data(): result = ser.apply(lambda x: Series(x)) expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]}) tm.assert_frame_equal(result, expected) + diff --git a/pandas/tests/apply/test_series_apply_bugFix.py b/pandas/tests/apply/test_series_apply_bugFix.py new file mode 100644 index 0000000000000..b34105f625122 --- /dev/null +++ b/pandas/tests/apply/test_series_apply_bugFix.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + concat, + date_range, + timedelta_range, +) +import pandas._testing as tm +from pandas.tests.apply.common import series_transform_kernels + +def test_series_map_NAinteger(): + s = pd.Series([1,2,None],dtype="Int32") + + def increment(x): + if x is None: + return pd.NA + return x+1 + + + result = s.map(increment) + + expectedResult = pd.Series([2,3,pd.NA],dtype = "Int32") + + pd.testing.assert_series_equal(result,expectedResult) + + + +def test_series_apply_NAinteger(): + s = pd.Series([1,2,None],dtype="Int32") + + def increment(x): + if x is None: + return pd.NA + return x+1 + + + result = s.apply(increment) + + expectedResult = pd.Series([2,3,pd.NA],dtype = "Int32") + + pd.testing.assert_series_equal(result,expectedResult) \ No newline at end of file