From fa0772950c3762b5478956e940c9b61c92f7b471 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 18 Sep 2023 22:56:22 +0200 Subject: [PATCH 1/5] Revert "DEPR: Deprecate returning a DataFrame in SeriesApply.apply_standard (#52123)" This reverts commit fe415f55 --- doc/source/user_guide/cookbook.rst | 10 ++++---- doc/source/user_guide/groupby.rst | 13 +++++++++++ doc/source/whatsnew/v0.10.0.rst | 29 +++++++---------------- doc/source/whatsnew/v2.1.0.rst | 8 ------- pandas/core/apply.py | 8 ------- pandas/core/series.py | 5 ---- pandas/tests/apply/test_series_apply.py | 31 +++++++------------------ 7 files changed, 35 insertions(+), 69 deletions(-) diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index 002e88533ab93..2d2c0a4db4df6 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -794,12 +794,12 @@ Apply index=["I", "II", "III"], ) - def make_df(ser): - new_vals = [pd.Series(value, name=name) for name, value in ser.items()] - return pd.DataFrame(new_vals) - - df_orgz = pd.concat({ind: row.pipe(make_df) for ind, row in df.iterrows()}) + def SeriesFromSubList(aList): + return pd.Series(aList) + df_orgz = pd.concat( + {ind: row.apply(SeriesFromSubList) for ind, row in df.iterrows()} + ) df_orgz `Rolling apply with a DataFrame returning a Series diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index 5dd14e243fbb3..4be62090ec645 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1213,6 +1213,19 @@ The dimension of the returned result can also change: grouped.apply(f) +``apply`` on a Series can operate on a returned value from the applied function +that is itself a series, and possibly upcast the result to a DataFrame: + +.. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) + Similar to :ref:`groupby.aggregate.agg`, the resulting dtype will reflect that of the apply function. If the results from different groups have different dtypes, then a common dtype will be determined in the same way as ``DataFrame`` construction. diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst index 422efc1b36946..be50c34d7d14c 100644 --- a/doc/source/whatsnew/v0.10.0.rst +++ b/doc/source/whatsnew/v0.10.0.rst @@ -261,26 +261,15 @@ Convenience methods ``ffill`` and ``bfill`` have been added: function, that is itself a series, and possibly upcast the result to a DataFrame - .. code-block:: python - - >>> def f(x): - ... return pd.Series([x, x ** 2], index=["x", "x^2"]) - >>> - >>> s = pd.Series(np.random.rand(5)) - >>> s - 0 0.340445 - 1 0.984729 - 2 0.919540 - 3 0.037772 - 4 0.861549 - dtype: float64 - >>> s.apply(f) - x x^2 - 0 0.340445 0.115903 - 1 0.984729 0.969691 - 2 0.919540 0.845555 - 3 0.037772 0.001427 - 4 0.861549 0.742267 + .. ipython:: python + + def f(x): + return pd.Series([x, x ** 2], index=["x", "x^2"]) + + + s = pd.Series(np.random.rand(5)) + s + s.apply(f) - New API functions for working with pandas options (:issue:`2097`): diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 040ca048d1224..e24ff67974cd8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -548,14 +548,6 @@ Other Deprecations - Deprecated :meth:`.Styler.applymap`. Use the new :meth:`.Styler.map` method instead (:issue:`52708`) - Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) -- Deprecated ``freq`` parameter in :class:`.PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) -- Deprecated allowing non-standard inputs in :func:`take`, pass either a ``numpy.ndarray``, :class:`.ExtensionArray`, :class:`Index`, or :class:`Series` (:issue:`52981`) -- Deprecated allowing non-standard sequences for :func:`isin`, :func:`value_counts`, :func:`unique`, :func:`factorize`, case to one of ``numpy.ndarray``, :class:`Index`, :class:`.ExtensionArray`, or :class:`Series` before calling (:issue:`52986`) -- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`) -- Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`) -- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :attr:`Series.dt` properties (:issue:`20306`) -- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or NumPy array before operating instead (:issue:`51521`) -- Deprecated making :meth:`Series.apply` return a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object. In the future this will return a :class:`Series` whose values are themselves :class:`Series`. This pattern was very slow and it's recommended to use alternative methods to archive the same goal (:issue:`52116`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated the ``fastpath`` keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 9748d4fe66739..1525e316f345f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1326,14 +1326,6 @@ def curried(x): ) if len(mapped) and isinstance(mapped[0], ABCSeries): - warnings.warn( - "Returning a DataFrame from Series.apply when the supplied function " - "returns a Series is deprecated and will be removed in a future " - "version.", - FutureWarning, - stacklevel=find_stack_level(), - ) # GH52116 - # GH#43986 Need to do list(mapped) in order to get treated as nested # See also GH#25959 regarding EA support return obj._constructor_expanddim(list(mapped), index=obj.index) diff --git a/pandas/core/series.py b/pandas/core/series.py index e0e27581ef7e2..b3ead03cd5e3f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4638,11 +4638,6 @@ def apply( """ Invoke function on values of Series. - .. deprecated:: 2.1.0 - - If the result from ``func`` is a ``Series``, wrapping the output in a - ``DataFrame`` instead of a ``Series`` has been deprecated. - Can be ufunc (a NumPy function that applies to the entire Series) or a Python function that only works on single values. diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index d3e5ac1b4ca7a..0c28f4d445efc 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -420,15 +420,15 @@ def test_agg_evaluate_lambdas(string_series): def test_with_nested_series(datetime_series, op_name): # GH 2316 # .agg with a reducer and a transform, what to do - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = getattr(datetime_series, op_name)( - lambda x: Series([x, x**2], index=["x", "x^2"]) - ) + result = getattr(datetime_series, op_name)( + lambda x: Series([x, x**2], index=["x", "x^2"]) + ) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) + result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) + tm.assert_frame_equal(result, expected) + def test_replicate_describe(string_series): # this also tests a result set that is all scalars @@ -512,10 +512,7 @@ def test_apply_series_on_date_time_index_aware_series(dti, exp, aware): index = dti.tz_localize("UTC").index else: index = dti.index - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = Series(index).apply(lambda x: Series([1, 2])) + result = Series(index).apply(lambda x: Series([1, 2])) tm.assert_frame_equal(result, exp) @@ -662,19 +659,7 @@ def test_apply_dictlike_lambda(ops, by_row, expected): def test_apply_retains_column_name(by_row): # GH 16380 df = DataFrame({"x": range(3)}, Index(range(3), name="x")) - func = lambda x: Series(range(x + 1), Index(range(x + 1), name="y")) - - if not by_row: - # GH53400 - msg = "'Series' object cannot be interpreted as an integer" - with pytest.raises(TypeError, match=msg): - df.x.apply(func, by_row=by_row) - return - - msg = "Returning a DataFrame from Series.apply when the supplied function" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH52123 - result = df.x.apply(func, by_row=by_row) + result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y"))) expected = DataFrame( [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]], columns=Index(range(3), name="y"), From ab921fd313f4f720e22658ce17ffd84157670a75 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 18 Sep 2023 23:00:53 +0200 Subject: [PATCH 2/5] Fix tests --- pandas/tests/apply/test_series_apply.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 0c28f4d445efc..5e19ed8caf819 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -420,13 +420,18 @@ def test_agg_evaluate_lambdas(string_series): def test_with_nested_series(datetime_series, op_name): # GH 2316 # .agg with a reducer and a transform, what to do - result = getattr(datetime_series, op_name)( - lambda x: Series([x, x**2], index=["x", "x^2"]) - ) + msg = "cannot aggregate" + warning = FutureWarning if op_name == "agg" else None + with tm.assert_produces_warning(warning, match=msg): + # GH52123 + result = getattr(datetime_series, op_name)( + lambda x: Series([x, x**2], index=["x", "x^2"]) + ) expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2}) tm.assert_frame_equal(result, expected) - result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"])) tm.assert_frame_equal(result, expected) From a5ae008d5e4c0c04bd2202b0bc9d54bde24bb946 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 18 Sep 2023 23:03:05 +0200 Subject: [PATCH 3/5] Add whatsnew --- doc/source/whatsnew/v2.1.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.1.rst b/doc/source/whatsnew/v2.1.1.rst index 6d5da7cdff3b3..c9ab496295d85 100644 --- a/doc/source/whatsnew/v2.1.1.rst +++ b/doc/source/whatsnew/v2.1.1.rst @@ -43,7 +43,7 @@ Bug fixes Other ~~~~~ -- +- Reverted the deprecation that disallowed :meth:`Series.apply` returning a :class:`DataFrame` when the passed-in callable returns a :class:`Series` object (:issue:`52116`) .. --------------------------------------------------------------------------- .. _whatsnew_211.contributors: From 7f8aeb844373bbc91e7477897663f4b5ef928bcf Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 18 Sep 2023 23:05:40 +0200 Subject: [PATCH 4/5] Add whatsnew --- doc/source/whatsnew/v2.1.0.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index e24ff67974cd8..18054e0b01191 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -548,6 +548,13 @@ Other Deprecations - Deprecated :meth:`.Styler.applymap`. Use the new :meth:`.Styler.map` method instead (:issue:`52708`) - Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) +- Deprecated ``freq`` parameter in :class:`.PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`) +- Deprecated allowing non-standard inputs in :func:`take`, pass either a ``numpy.ndarray``, :class:`.ExtensionArray`, :class:`Index`, or :class:`Series` (:issue:`52981`) +- Deprecated allowing non-standard sequences for :func:`isin`, :func:`value_counts`, :func:`unique`, :func:`factorize`, case to one of ``numpy.ndarray``, :class:`Index`, :class:`.ExtensionArray`, or :class:`Series` before calling (:issue:`52986`) +- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`) +- Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`) +- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :attr:`Series.dt` properties (:issue:`20306`) +- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or NumPy array before operating instead (:issue:`51521`) - Deprecated parameter ``convert_type`` in :meth:`Series.apply` (:issue:`52140`) - Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated the ``fastpath`` keyword in :class:`Categorical` constructor, use :meth:`Categorical.from_codes` instead (:issue:`20110`) From 88b4f01b017c5ccff3b28fffa40595b2112524dd Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Tue, 19 Sep 2023 22:29:10 +0200 Subject: [PATCH 5/5] Add test --- pandas/tests/apply/test_series_apply.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index 5e19ed8caf819..aeb6a01eb587a 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -679,3 +679,11 @@ def test_apply_type(): result = s.apply(type) expected = Series([int, str, type], index=["a", "b", "c"]) tm.assert_series_equal(result, expected) + + +def test_series_apply_unpack_nested_data(): + # GH#55189 + ser = Series([[1, 2, 3], [4, 5, 6, 7]]) + result = ser.apply(lambda x: Series(x)) + expected = DataFrame({0: [1.0, 4.0], 1: [2.0, 5.0], 2: [3.0, 6.0], 3: [np.nan, 7]}) + tm.assert_frame_equal(result, expected)