diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 92124a536fe26..9f2171a6aba9a 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -101,6 +101,7 @@ Other enhancements - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`) - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`) - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`) +- Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`). - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`) - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`) - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 78c078da3df21..007dd2bb2a89d 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -16,6 +16,7 @@ Iterable, Iterator, List, + Literal, Sequence, cast, ) @@ -288,6 +289,11 @@ def agg_list_like(self) -> DataFrame | Series: ------- Result of aggregation. """ + return self.agg_or_apply_list_like(op_name="agg") + + def agg_or_apply_list_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: from pandas.core.groupby.generic import ( DataFrameGroupBy, SeriesGroupBy, @@ -296,6 +302,9 @@ def agg_list_like(self) -> DataFrame | Series: obj = self.obj func = cast(List[AggFuncTypeBase], self.func) + kwargs = self.kwargs + if op_name == "apply": + kwargs = {**kwargs, "by_row": False} if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -313,8 +322,6 @@ def agg_list_like(self) -> DataFrame | Series: keys = [] is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) - is_ser_or_df = isinstance(obj, (ABCDataFrame, ABCSeries)) - this_args = [self.axis, *self.args] if is_ser_or_df else self.args context_manager: ContextManager if is_groupby: @@ -323,12 +330,19 @@ def agg_list_like(self) -> DataFrame | Series: context_manager = com.temp_setattr(obj, "as_index", True) else: context_manager = nullcontext() + + def include_axis(colg) -> bool: + return isinstance(colg, ABCDataFrame) or ( + isinstance(colg, ABCSeries) and op_name == "agg" + ) + with context_manager: # degenerate case if selected_obj.ndim == 1: for a in func: colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj) - new_res = colg.aggregate(a, *this_args, **self.kwargs) + args = [self.axis, *self.args] if include_axis(colg) else self.args + new_res = getattr(colg, op_name)(a, *args, **kwargs) results.append(new_res) # make sure we find a good name @@ -339,7 +353,8 @@ def agg_list_like(self) -> DataFrame | Series: indices = [] for index, col in enumerate(selected_obj): colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index]) - new_res = colg.aggregate(func, *this_args, **self.kwargs) + args = [self.axis, *self.args] if include_axis(colg) else self.args + new_res = getattr(colg, op_name)(func, *args, **kwargs) results.append(new_res) indices.append(index) keys = selected_obj.columns.take(indices) @@ -366,6 +381,11 @@ def agg_dict_like(self) -> DataFrame | Series: ------- Result of aggregation. """ + return self.agg_or_apply_dict_like(op_name="agg") + + def agg_or_apply_dict_like( + self, op_name: Literal["agg", "apply"] + ) -> DataFrame | Series: from pandas import Index from pandas.core.groupby.generic import ( DataFrameGroupBy, @@ -373,8 +393,11 @@ def agg_dict_like(self) -> DataFrame | Series: ) from pandas.core.reshape.concat import concat + assert op_name in ["agg", "apply"] + obj = self.obj func = cast(AggFuncTypeDict, self.func) + kwargs = {"by_row": False} if op_name == "apply" else {} if getattr(obj, "axis", 0) == 1: raise NotImplementedError("axis other than 0 is not supported") @@ -387,7 +410,7 @@ def agg_dict_like(self) -> DataFrame | Series: selected_obj = obj._selected_obj selection = obj._selection - func = self.normalize_dictlike_arg("agg", selected_obj, func) + func = self.normalize_dictlike_arg(op_name, selected_obj, func) is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy)) context_manager: ContextManager @@ -404,17 +427,18 @@ def agg_dict_like(self) -> DataFrame | Series: ) # Numba Groupby engine/engine-kwargs passthrough - kwargs = {} if is_groupby: engine = self.kwargs.get("engine", None) engine_kwargs = self.kwargs.get("engine_kwargs", None) - kwargs = {"engine": engine, "engine_kwargs": engine_kwargs} + kwargs.update({"engine": engine, "engine_kwargs": engine_kwargs}) with context_manager: if selected_obj.ndim == 1: # key only used for output colg = obj._gotitem(selection, ndim=1) - result_data = [colg.agg(how, **kwargs) for _, how in func.items()] + result_data = [ + getattr(colg, op_name)(how, **kwargs) for _, how in func.items() + ] result_index = list(func.keys()) elif is_non_unique_col: # key used for column selection and output @@ -429,7 +453,9 @@ def agg_dict_like(self) -> DataFrame | Series: label_to_indices[label].append(index) key_data = [ - selected_obj._ixs(indice, axis=1).agg(how, **kwargs) + getattr(selected_obj._ixs(indice, axis=1), op_name)( + how, **kwargs + ) for label, indices in label_to_indices.items() for indice in indices ] @@ -439,7 +465,7 @@ def agg_dict_like(self) -> DataFrame | Series: else: # key used for column selection and output result_data = [ - obj._gotitem(key, ndim=1).agg(how, **kwargs) + getattr(obj._gotitem(key, ndim=1), op_name)(how, **kwargs) for key, how in func.items() ] result_index = list(func.keys()) @@ -535,7 +561,7 @@ def apply_str(self) -> DataFrame | Series: self.kwargs["axis"] = self.axis return self._apply_str(obj, func, *self.args, **self.kwargs) - def apply_multiple(self) -> DataFrame | Series: + def apply_list_or_dict_like(self) -> DataFrame | Series: """ Compute apply in case of a list-like or dict-like. @@ -551,9 +577,9 @@ def apply_multiple(self) -> DataFrame | Series: kwargs = self.kwargs if is_dict_like(func): - result = self.agg_dict_like() + result = self.agg_or_apply_dict_like(op_name="apply") else: - result = self.agg_list_like() + result = self.agg_or_apply_list_like(op_name="apply") result = reconstruct_and_relabel_result(result, func, **kwargs) @@ -692,9 +718,9 @@ def values(self): def apply(self) -> DataFrame | Series: """compute the results""" - # dispatch to agg + # dispatch to handle list-like or dict-like if is_list_like(self.func): - return self.apply_multiple() + return self.apply_list_or_dict_like() # all empty if len(self.columns) == 0 and len(self.index) == 0: @@ -1041,6 +1067,7 @@ def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame: class SeriesApply(NDFrameApply): obj: Series axis: AxisInt = 0 + by_row: bool # only relevant for apply() def __init__( self, @@ -1048,6 +1075,7 @@ def __init__( func: AggFuncType, *, convert_dtype: bool | lib.NoDefault = lib.no_default, + by_row: bool = True, args, kwargs, ) -> None: @@ -1062,6 +1090,7 @@ def __init__( stacklevel=find_stack_level(), ) self.convert_dtype = convert_dtype + self.by_row = by_row super().__init__( obj, @@ -1078,9 +1107,9 @@ def apply(self) -> DataFrame | Series: if len(obj) == 0: return self.apply_empty_result() - # dispatch to agg + # dispatch to handle list-like or dict-like if is_list_like(self.func): - return self.apply_multiple() + return self.apply_list_or_dict_like() if isinstance(self.func, str): # if we are a string, try to dispatch @@ -1126,6 +1155,8 @@ def apply_standard(self) -> DataFrame | Series: if isinstance(func, np.ufunc): with np.errstate(all="ignore"): return func(obj, *self.args, **self.kwargs) + elif not self.by_row: + return func(obj, *self.args, **self.kwargs) if self.args or self.kwargs: # _map_values does not support args/kwargs diff --git a/pandas/core/series.py b/pandas/core/series.py index cb3edbfd6cfc3..41a32cb60c39f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4496,6 +4496,8 @@ def apply( func: AggFuncType, convert_dtype: bool | lib.NoDefault = lib.no_default, args: tuple[Any, ...] = (), + *, + by_row: bool = True, **kwargs, ) -> DataFrame | Series: """ @@ -4523,6 +4525,12 @@ def apply( instead if you want ``convert_dtype=False``. args : tuple Positional arguments passed to func after the series value. + by_row : bool, default True + If False, the func will be passed the whole Series at once. + If True, will func will be passed each element of the Series, like + Series.map (backward compatible). + + .. versionadded:: 2.1.0 **kwargs Additional keyword arguments passed to func. @@ -4611,7 +4619,12 @@ def apply( dtype: float64 """ return SeriesApply( - self, func, convert_dtype=convert_dtype, args=args, kwargs=kwargs + self, + func, + convert_dtype=convert_dtype, + by_row=by_row, + args=args, + kwargs=kwargs, ).apply() def _reindex_indexer( diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index e37006eb0a5f6..985cb5aa5b09c 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -1,5 +1,3 @@ -import math - import numpy as np import pytest @@ -7,6 +5,7 @@ from pandas import ( DataFrame, Index, + MultiIndex, Series, concat, timedelta_range, @@ -15,31 +14,41 @@ from pandas.tests.apply.common import series_transform_kernels -def test_series_map_box_timedelta(): +@pytest.fixture(params=[True, False]) +def by_row(request): + return request.param + + +def test_series_map_box_timedelta(by_row): # GH#11349 ser = Series(timedelta_range("1 day 1 s", periods=3, freq="h")) def f(x): - return x.total_seconds() + return x.total_seconds() if by_row else x.dt.total_seconds() - result = ser.apply(f) + result = ser.apply(f, by_row=by_row) - tm.assert_series_equal(result, ser.map(f)) + expected = ser.map(lambda x: x.total_seconds()) + tm.assert_series_equal(result, expected) expected = Series([86401.0, 90001.0, 93601.0]) tm.assert_series_equal(result, expected) -def test_apply(datetime_series): +def test_apply(datetime_series, by_row): + result = datetime_series.apply(np.sqrt, by_row=by_row) with np.errstate(all="ignore"): - tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series)) + expected = np.sqrt(datetime_series) + tm.assert_series_equal(result, expected) - # element-wise apply - tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series)) + # element-wise apply (ufunc) + result = datetime_series.apply(np.exp, by_row=by_row) + expected = np.exp(datetime_series) + tm.assert_series_equal(result, expected) # empty series s = Series(dtype=object, name="foo", index=Index([], name="bar")) - rs = s.apply(lambda x: x) + rs = s.apply(lambda x: x, by_row=by_row) tm.assert_series_equal(s, rs) # check all metadata (GH 9322) @@ -50,7 +59,7 @@ def test_apply(datetime_series): # index but no data s = Series(index=[1, 2, 3], dtype=np.float64) - rs = s.apply(lambda x: x) + rs = s.apply(lambda x: x, by_row=by_row) tm.assert_series_equal(s, rs) @@ -60,12 +69,12 @@ def test_apply_map_same_length_inference_bug(): def f(x): return (x, x + 1) - result = s.apply(f) + result = s.apply(f, by_row=True) expected = s.map(f) tm.assert_series_equal(result, expected) s = Series([1, 2, 3]) - result = s.apply(f) + result = s.apply(f, by_row=by_row) expected = s.map(f) tm.assert_series_equal(result, expected) @@ -78,7 +87,7 @@ def func(x): return x if x > 0 else np.nan with tm.assert_produces_warning(FutureWarning): - ser.apply(func, convert_dtype=convert_dtype) + ser.apply(func, convert_dtype=convert_dtype, by_row=True) def test_apply_args(): @@ -124,14 +133,20 @@ def foo2(x, b=2, c=0): tm.assert_frame_equal(result, expected) -def test_series_apply_map_box_timestamps(): +def test_series_apply_map_box_timestamps(by_row): # GH#2689, GH#2627 ser = Series(pd.date_range("1/1/2000", periods=10)) def func(x): return (x.hour, x.day, x.month) - result = ser.apply(func) + if not by_row: + msg = "Series' object has no attribute 'hour'" + with pytest.raises(AttributeError, match=msg): + ser.apply(func, by_row=by_row) + return + + result = ser.apply(func, by_row=by_row) expected = ser.map(func) tm.assert_series_equal(result, expected) @@ -142,7 +157,7 @@ def test_apply_box(): s = Series(vals) assert s.dtype == "datetime64[ns]" # boxed value must be Timestamp instance - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row=True) exp = Series(["Timestamp_1_None", "Timestamp_2_None"]) tm.assert_series_equal(res, exp) @@ -152,7 +167,7 @@ def test_apply_box(): ] s = Series(vals) assert s.dtype == "datetime64[ns, US/Eastern]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}") + res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}", by_row=True) exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"]) tm.assert_series_equal(res, exp) @@ -160,7 +175,7 @@ def test_apply_box(): vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")] s = Series(vals) assert s.dtype == "timedelta64[ns]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.days}") + res = s.apply(lambda x: f"{type(x).__name__}_{x.days}", by_row=True) exp = Series(["Timedelta_1", "Timedelta_2"]) tm.assert_series_equal(res, exp) @@ -168,41 +183,52 @@ def test_apply_box(): vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")] s = Series(vals) assert s.dtype == "Period[M]" - res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}") + res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}", by_row=True) exp = Series(["Period_M", "Period_M"]) tm.assert_series_equal(res, exp) -def test_apply_datetimetz(): +def test_apply_datetimetz(by_row): values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize( "Asia/Tokyo" ) s = Series(values, name="XX") - result = s.apply(lambda x: x + pd.offsets.Day()) + result = s.apply(lambda x: x + pd.offsets.Day(), by_row=by_row) exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize( "Asia/Tokyo" ) exp = Series(exp_values, name="XX") tm.assert_series_equal(result, exp) - result = s.apply(lambda x: x.hour) - exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64) + result = s.apply(lambda x: x.hour if by_row else x.dt.hour, by_row=by_row) + exp = Series(list(range(24)) + [0], name="XX", dtype="int64" if by_row else "int32") tm.assert_series_equal(result, exp) # not vectorized def f(x): - return str(x.tz) + return str(x.tz) if by_row else str(x.dt.tz) - result = s.apply(f) - exp = Series(["Asia/Tokyo"] * 25, name="XX") - tm.assert_series_equal(result, exp) + result = s.apply(f, by_row=by_row) + if by_row: + exp = Series(["Asia/Tokyo"] * 25, name="XX") + tm.assert_series_equal(result, exp) + else: + result == "Asia/Tokyo" -def test_apply_categorical(): +def test_apply_categorical(by_row): values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True) ser = Series(values, name="XX", index=list("abcdefg")) - result = ser.apply(lambda x: x.lower()) + + if not by_row: + msg = "Series' object has no attribute 'lower" + with pytest.raises(AttributeError, match=msg): + ser.apply(lambda x: x.lower(), by_row=by_row) + assert ser.apply(lambda x: "A", by_row=by_row) == "A" + return + + result = ser.apply(lambda x: x.lower(), by_row=by_row) # should be categorical dtype when the number of categories are # the same @@ -218,24 +244,30 @@ def test_apply_categorical(): @pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]]) -def test_apply_categorical_with_nan_values(series): +def test_apply_categorical_with_nan_values(series, by_row): # GH 20714 bug fixed in: GH 24275 s = Series(series, dtype="category") - result = s.apply(lambda x: x.split("-")[0]) + if not by_row: + msg = "'Series' object has no attribute 'split'" + with pytest.raises(AttributeError, match=msg): + s.apply(lambda x: x.split("-")[0], by_row=by_row) + return + + result = s.apply(lambda x: x.split("-")[0], by_row=by_row) result = result.astype(object) expected = Series(["1", "1", np.NaN], dtype="category") expected = expected.astype(object) tm.assert_series_equal(result, expected) -def test_apply_empty_integer_series_with_datetime_index(): +def test_apply_empty_integer_series_with_datetime_index(by_row): # GH 21245 s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int) - result = s.apply(lambda x: x) + result = s.apply(lambda x: x, by_row=by_row) tm.assert_series_equal(result, s) -def test_transform(string_series): +def test_transform(string_series, by_row): # transforming functions with np.errstate(all="ignore"): @@ -243,17 +275,17 @@ def test_transform(string_series): f_abs = np.abs(string_series) # ufunc - result = string_series.apply(np.sqrt) + result = string_series.apply(np.sqrt, by_row=by_row) expected = f_sqrt.copy() tm.assert_series_equal(result, expected) # list-like - result = string_series.apply([np.sqrt]) + result = string_series.apply([np.sqrt], by_row=by_row) expected = f_sqrt.to_frame().copy() expected.columns = ["sqrt"] tm.assert_frame_equal(result, expected) - result = string_series.apply(["sqrt"]) + result = string_series.apply(["sqrt"], by_row=by_row) tm.assert_frame_equal(result, expected) # multiple items in list @@ -261,7 +293,7 @@ def test_transform(string_series): # series and then concatting expected = concat([f_sqrt, f_abs], axis=1) expected.columns = ["sqrt", "absolute"] - result = string_series.apply([np.sqrt, np.abs]) + result = string_series.apply([np.sqrt, np.abs], by_row=by_row) tm.assert_frame_equal(result, expected) # dict, provide renaming @@ -269,7 +301,7 @@ def test_transform(string_series): expected.columns = ["foo", "bar"] expected = expected.unstack().rename("series") - result = string_series.apply({"foo": np.sqrt, "bar": np.abs}) + result = string_series.apply({"foo": np.sqrt, "bar": np.abs}, by_row=by_row) tm.assert_series_equal(result.reindex_like(expected), expected) @@ -346,16 +378,17 @@ def test_demo(): tm.assert_series_equal(result, expected) -def test_agg_apply_evaluate_lambdas_the_same(string_series): - # test that we are evaluating row-by-row first - # before vectorized evaluation - result = string_series.apply(lambda x: str(x)) - expected = string_series.agg(lambda x: str(x)) - tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("func", [str, lambda x: str(x)]) +def test_apply_map_evaluate_lambdas_the_same(string_series, func, by_row): + # test that we are evaluating row-by-row first if by_row=True + # else vectorized evaluation + result = string_series.apply(func, by_row=by_row) - result = string_series.apply(str) - expected = string_series.agg(str) - tm.assert_series_equal(result, expected) + if by_row: + expected = string_series.map(func) + tm.assert_series_equal(result, expected) + else: + assert result == str(string_series) def test_with_nested_series(datetime_series): @@ -376,7 +409,7 @@ def test_with_nested_series(datetime_series): tm.assert_frame_equal(result, expected) -def test_replicate_describe(string_series): +def test_replicate_describe(string_series, by_row): # this also tests a result set that is all scalars expected = string_series.describe() result = string_series.apply( @@ -389,7 +422,8 @@ def test_replicate_describe(string_series): "50%": "median", "75%": lambda x: x.quantile(0.75), "max": "max", - } + }, + by_row=by_row, ) tm.assert_series_equal(result, expected) @@ -405,27 +439,33 @@ def test_reduce(string_series): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("how", ["agg", "apply"]) -def test_non_callable_aggregates(how): +@pytest.mark.parametrize( + "how, kwds", + [("agg", {}), ("apply", {"by_row": True}), ("apply", {"by_row": False})], +) +def test_non_callable_aggregates(how, kwds): # test agg using non-callable series attributes # GH 39116 - expand to apply s = Series([1, 2, None]) # Calling agg w/ just a string arg same as calling s.arg - result = getattr(s, how)("size") + result = getattr(s, how)("size", **kwds) expected = s.size assert result == expected # test when mixed w/ callable reducers - result = getattr(s, how)(["size", "count", "mean"]) + result = getattr(s, how)(["size", "count", "mean"], **kwds) expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5}) tm.assert_series_equal(result, expected) + result = getattr(s, how)({"size": "size", "count": "count", "mean": "mean"}, **kwds) + tm.assert_series_equal(result, expected) -def test_series_apply_no_suffix_index(): + +def test_series_apply_no_suffix_index(by_row): # GH36189 s = Series([4] * 3) - result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()]) + result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()], by_row=by_row) expected = Series([12, 12, 12], index=["sum", "", ""]) tm.assert_series_equal(result, expected) @@ -459,25 +499,28 @@ def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): tm.assert_frame_equal(result, exp) -def test_apply_scalar_on_date_time_index_aware_series(): +@pytest.mark.parametrize( + "by_row, expected", [(True, Series(np.ones(30), dtype="int64")), (False, 1)] +) +def test_apply_scalar_on_date_time_index_aware_series(by_row, expected): # GH 25959 # Calling apply on a localized time series should not cause an error series = tm.makeTimeSeries(nper=30).tz_localize("UTC") - result = Series(series.index).apply(lambda x: 1) - tm.assert_series_equal(result, Series(np.ones(30), dtype="int64")) + result = Series(series.index).apply(lambda x: 1, by_row=by_row) + tm.assert_equal(result, expected) -def test_apply_to_timedelta(): +def test_apply_to_timedelta(by_row): list_of_valid_strings = ["00:00:01", "00:00:02"] a = pd.to_timedelta(list_of_valid_strings) - b = Series(list_of_valid_strings).apply(pd.to_timedelta) + b = Series(list_of_valid_strings).apply(pd.to_timedelta, by_row=by_row) tm.assert_series_equal(Series(a), b) list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT] a = pd.to_timedelta(list_of_strings) ser = Series(list_of_strings) - b = ser.apply(pd.to_timedelta) + b = ser.apply(pd.to_timedelta, by_row=by_row) tm.assert_series_equal(Series(a), b) @@ -490,12 +533,15 @@ def test_apply_to_timedelta(): (np.array([np.sum, np.mean]), ["sum", "mean"]), ], ) -@pytest.mark.parametrize("how", ["agg", "apply"]) -def test_apply_listlike_reducer(string_series, ops, names, how): +@pytest.mark.parametrize( + "how, kwargs", + [["agg", {}], ["apply", {"by_row": True}], ["apply", {"by_row": False}]], +) +def test_apply_listlike_reducer(string_series, ops, names, how, kwargs): # GH 39140 expected = Series({name: op(string_series) for name, op in zip(names, ops)}) expected.name = "series" - result = getattr(string_series, how)(ops) + result = getattr(string_series, how)(ops, **kwargs) tm.assert_series_equal(result, expected) @@ -508,12 +554,15 @@ def test_apply_listlike_reducer(string_series, ops, names, how): Series({"A": np.sum, "B": np.mean}), ], ) -@pytest.mark.parametrize("how", ["agg", "apply"]) -def test_apply_dictlike_reducer(string_series, ops, how): +@pytest.mark.parametrize( + "how, kwargs", + [["agg", {}], ["apply", {"by_row": True}], ["apply", {"by_row": False}]], +) +def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row): # GH 39140 expected = Series({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - result = getattr(string_series, how)(ops) + result = getattr(string_series, how)(ops, **kwargs) tm.assert_series_equal(result, expected) @@ -526,15 +575,29 @@ def test_apply_dictlike_reducer(string_series, ops, how): (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]), ], ) -def test_apply_listlike_transformer(string_series, ops, names): +def test_apply_listlike_transformer(string_series, ops, names, by_row): # GH 39140 with np.errstate(all="ignore"): expected = concat([op(string_series) for op in ops], axis=1) expected.columns = names - result = string_series.apply(ops) + result = string_series.apply(ops, by_row=by_row) tm.assert_frame_equal(result, expected) +@pytest.mark.parametrize( + "ops, expected", + [ + ([lambda x: x], DataFrame({"": [1, 2, 3]})), + ([lambda x: x.sum()], Series([6], index=[""])), + ], +) +def test_apply_listlike_lambda(ops, expected, by_row=by_row): + # GH53400 + ser = Series([1, 2, 3]) + result = ser.apply(ops, by_row=by_row) + tm.assert_equal(result, expected) + + @pytest.mark.parametrize( "ops", [ @@ -544,23 +607,48 @@ def test_apply_listlike_transformer(string_series, ops, names): Series({"A": np.sqrt, "B": np.exp}), ], ) -def test_apply_dictlike_transformer(string_series, ops): +def test_apply_dictlike_transformer(string_series, ops, by_row): # GH 39140 with np.errstate(all="ignore"): expected = concat({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - result = string_series.apply(ops) + result = string_series.apply(ops, by_row=by_row) tm.assert_series_equal(result, expected) -def test_apply_retains_column_name(): +@pytest.mark.parametrize( + "ops, expected", + [ + ( + {"a": lambda x: x}, + Series([1, 2, 3], index=MultiIndex.from_arrays([["a"] * 3, range(3)])), + ), + ({"a": lambda x: x.sum()}, Series([6], index=["a"])), + ], +) +def test_apply_dictlike_lambda(ops, by_row, expected): + # GH53400 + ser = Series([1, 2, 3]) + result = ser.apply(ops, by_row=by_row) + tm.assert_equal(result, expected) + + +def test_apply_retains_column_name(by_row): # GH 16380 df = DataFrame({"x": range(3)}, Index(range(3), name="x")) func = lambda x: Series(range(x + 1), Index(range(x + 1), name="y")) + + if not by_row: + # GH53400 + msg = "'Series' object cannot be interpreted as an integer" + with pytest.raises(TypeError, match=msg): + df.x.apply(func, by_row=by_row) + return + msg = "Returning a DataFrame from Series.apply when the supplied function" with tm.assert_produces_warning(FutureWarning, match=msg): # GH52123 - result = df.x.apply(func) + result = df.x.apply(func, by_row=by_row) expected = DataFrame( [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], columns=Index(range(3), name="y"),