diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index daf5a0e481b8e..3a0aad41933bc 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -794,12 +794,12 @@ Apply index=["I", "II", "III"], ) - def SeriesFromSubList(aList): - return pd.Series(aList) + def make_df(ser): + new_vals = [pd.Series(value, name=name) for name, value in ser.items()] + return pd.DataFrame(new_vals) + + df_orgz = pd.concat({ind: row.pipe(make_df) for ind, row in df.iterrows()}) - df_orgz = pd.concat( - {ind: row.apply(SeriesFromSubList) for ind, row in df.iterrows()} - ) df_orgz `Rolling apply with a DataFrame returning a Series diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index ed1689f0c9f79..d301373bb6eea 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1221,19 +1221,6 @@ The dimension of the returned result can also change: grouped.apply(f) -``apply`` on a Series can operate on a returned value from the applied function -that is itself a series, and possibly upcast the result to a DataFrame: - -.. ipython:: python - - def f(x): - return pd.Series([x, x ** 2], index=["x", "x^2"]) - - - s = pd.Series(np.random.rand(5)) - s - s.apply(f) - Similar to :ref:`groupby.aggregate.agg`, the resulting dtype will reflect that of the apply function. If the results from different groups have different dtypes, then a common dtype will be determined in the same way as ``DataFrame`` construction. diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst index bd47e6e4bc025..388e716d122a5 100644 --- a/doc/source/whatsnew/v0.10.0.rst +++ b/doc/source/whatsnew/v0.10.0.rst @@ -243,15 +243,26 @@ Convenience methods ``ffill`` and ``bfill`` have been added: function, that is itself a series, and possibly upcast the result to a DataFrame - .. ipython:: python - - def f(x): - return pd.Series([x, x ** 2], index=["x", "x^2"]) - - - s = pd.Series(np.random.rand(5)) - s - s.apply(f) + .. code-block:: python + + >>> def f(x): + ... return pd.Series([x, x ** 2], index=["x", "x^2"]) + >>> + >>> s = pd.Series(np.random.rand(5)) + >>> s + 0 0.340445 + 1 0.984729 + 2 0.919540 + 3 0.037772 + 4 0.861549 + dtype: float64 + >>> s.apply(f) + x x^2 + 0 0.340445 0.115903 + 1 0.984729 0.969691 + 2 0.919540 0.845555 + 3 0.037772 0.001427 + 4 0.861549 0.742267 - New API functions for working with pandas options (:issue:`2097`): diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 743bb78c70c36..50d4a8250b005 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -171,6 +171,7 @@ Deprecations - Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`) - Deprecated the methods :meth:`Series.bool` and :meth:`DataFrame.bool` (:issue:`51749`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) +- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 2ffd49f674cfb..53c63132b2602 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -19,6 +19,7 @@ Sequence, cast, ) +import warnings import numpy as np @@ -36,6 +37,7 @@ ) from pandas.errors import SpecificationError from pandas.util._decorators import cache_readonly +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import ( @@ -1091,6 +1093,14 @@ def apply_standard(self) -> DataFrame | Series: mapped = obj._map_values(mapper=f, na_action=action, convert=self.convert_dtype) if len(mapped) and isinstance(mapped[0], ABCSeries): + warnings.warn( + "Returning a DataFrame from Series.apply when the supplied function" + "returns a Series is deprecated and will be removed in a future " + "version.", + FutureWarning, + stacklevel=find_stack_level(), + ) # GH52116 + # GH#43986 Need to do list(mapped) in order to get treated as nested # See also GH#25959 regarding EA support return obj._constructor_expanddim(list(mapped), index=obj.index) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9c91badc57ce3..a79d35980f9a2 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4413,6 +4413,11 @@ def apply( """ Invoke function on values of Series. + .. deprecated:: 2.1.0 + + If the result from ``func`` is a ``Series``, wrapping the output in a + ``DataFrame`` instead of a ``Series`` has been deprecated. + Can be ufunc (a NumPy function that applies to the entire Series) or a Python function that only works on single values. diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index abd59d530e5d8..e37006eb0a5f6 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -361,11 +361,18 @@ def test_agg_apply_evaluate_lambdas_the_same(string_series): def test_with_nested_series(datetime_series): # GH 2316 # .agg with a reducer and a transform, what to do - result = datetime_series.apply(lambda x: Series([x, x**2], index=["x", "x^2"])) + msg = "Returning a DataFrame from Series.apply when the supplied function" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH52123 + result = datetime_series.apply( + lambda x: Series([x, x**2], index=["x", "x^2"]) + ) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) - result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH52123 + result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) tm.assert_frame_equal(result, expected) @@ -445,7 +452,10 @@ def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): index = dti.tz_localize("UTC").index else: index = dti.index - result = Series(index).apply(lambda x: Series([1, 2])) + msg = "Returning a DataFrame from Series.apply when the supplied function" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH52123 + result = Series(index).apply(lambda x: Series([1, 2])) tm.assert_frame_equal(result, exp) @@ -546,7 +556,11 @@ def test_apply_dictlike_transformer(string_series, ops): def test_apply_retains_column_name(): # GH 16380 df = DataFrame({"x": range(3)}, Index(range(3), name="x")) - result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) + func = lambda x: Series(range(x + 1), Index(range(x + 1), name="y")) + msg = "Returning a DataFrame from Series.apply when the supplied function" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH52123 + result = df.x.apply(func) expected = DataFrame( [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], columns=Index(range(3), name="y"),