From 0c6c0299e1da4273aa47ab1426cc4ffd7e054b5e Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 9 Jan 2023 16:29:05 +0800 Subject: [PATCH 01/11] add-FutureWarning-pandas.io.sql.execute --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/io/sql.py | 6 ++++++ pandas/tests/io/test_sql.py | 9 +++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3ea4447cdfa93..95c760ebaef9a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -510,7 +510,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) - +- Deprecated :func:`pandas.io.sql.execute`(:issue:`50185`) .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 2b845786b0366..eea97fc0d9760 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -201,6 +201,12 @@ def execute(sql, con, params=None): ------- Results Iterable """ + warnings.warn( + "`pandas.io.sql.execute` is deprecated and " + "will be removed in the future version.", + FutureWarning, + stacklevel=find_stack_level(), + ) # GH50185 sqlalchemy = import_optional_dependency("sqlalchemy", errors="ignore") if sqlalchemy is not None and isinstance(con, (str, sqlalchemy.engine.Engine)): diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 31ca060e36ad1..15c45821f959e 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -668,6 +668,15 @@ def test_execute_typeerror(sqlite_iris_engine): sql.execute("select * from iris", sqlite_iris_engine) +def test_execute_deprecated(sqlite_buildin_iris): + with tm.assert_produces_warning( + FutureWarning, + match="`pandas.io.sql.execute` is deprecated and " + "will be removed in the future version.", + ): + sql.execute("select * from iris", sqlite_buildin_iris) + + class MixInBase: def teardown_method(self): # if setup fails, there may not be a connection to close. From d055ee0027d8a0a762fee68d461e978d07afbbe4 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Mon, 9 Jan 2023 23:32:48 +0800 Subject: [PATCH 02/11] add GH and try fix doc build --- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/tests/io/test_sql.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 95c760ebaef9a..efbdd52aa30b3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -510,7 +510,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) -- Deprecated :func:`pandas.io.sql.execute`(:issue:`50185`) +- Deprecated ``pandas.io.sql.execute``(:issue:`50185`) .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 15c45821f959e..f83b6b0373a87 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -669,6 +669,7 @@ def test_execute_typeerror(sqlite_iris_engine): def test_execute_deprecated(sqlite_buildin_iris): + # GH50185 with tm.assert_produces_warning( FutureWarning, match="`pandas.io.sql.execute` is deprecated and " From eca7ca39467f56fba626159ff3aa36421af7e518 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 10 Jan 2023 11:38:16 +0800 Subject: [PATCH 03/11] try fix doc build --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 992b51387eda8..27053a9a5df23 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -553,7 +553,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) -- Deprecated ``pandas.io.sql.execute``(:issue:`50185`) +- Deprecated :func:`pandas.io.sql.execute`(:issue:`50185`) .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: From 224ce2e96cc99d26125f38e5ebc4e351a8700b4c Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 10 Jan 2023 21:14:04 +0800 Subject: [PATCH 04/11] change comments in core/generic.py --- pandas/core/generic.py | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 561422c868e91..73ed223ec013b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2858,8 +2858,12 @@ def to_sql( >>> df.to_sql('users', con=engine) 3 - >>> engine.execute("SELECT * FROM users").fetchall() - [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] + >>> pd.read_sql_query("SELECT * FROM users", con=engine) + index name + 0 0 User 1 + 1 1 User 2 + 2 2 User 3 + An `sqlalchemy.engine.Connection` can also be passed to `con`: @@ -2874,18 +2878,25 @@ def to_sql( >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) >>> df2.to_sql('users', con=engine, if_exists='append') 2 - >>> engine.execute("SELECT * FROM users").fetchall() - [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), - (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), - (1, 'User 7')] + >>> pd.read_sql_query("SELECT * FROM users", con=engine) + index name + 0 0 User 1 + 1 1 User 2 + 2 2 User 3 + 3 0 User 4 + 4 1 User 5 + 5 0 User 6 + 6 1 User 7 Overwrite the table with just ``df2``. >>> df2.to_sql('users', con=engine, if_exists='replace', ... index_label='id') 2 - >>> engine.execute("SELECT * FROM users").fetchall() - [(0, 'User 6'), (1, 'User 7')] + >>> pd.read_sql_query("SELECT * FROM users", con=engine) + index name + 0 0 User 6 + 1 1 User 7 Specify the dtype (especially useful for integers with missing values). Notice that while pandas is forced to store the data as floating point, @@ -2904,8 +2915,9 @@ def to_sql( ... dtype={"A": Integer()}) 3 - >>> engine.execute("SELECT * FROM integers").fetchall() - [(1,), (None,), (2,)] + >>> pd.read_sql("SELECT * FROM integers", con=engine) + A + 0 1.0 """ # noqa:E501 from pandas.io import sql From ce00e61a440a195b72a8a803844ca0c433e83928 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 10 Jan 2023 21:23:52 +0800 Subject: [PATCH 05/11] fix an res error --- pandas/core/generic.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 73ed223ec013b..5c5fe866ea280 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2918,6 +2918,8 @@ def to_sql( >>> pd.read_sql("SELECT * FROM integers", con=engine) A 0 1.0 + 1 NaN + 2 2.0 """ # noqa:E501 from pandas.io import sql From 8138721407f3cbad0b24818b447bd19f41524488 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 10 Jan 2023 22:21:38 +0800 Subject: [PATCH 06/11] fix double break line --- pandas/core/generic.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5c5fe866ea280..b97efcb82a0c6 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2864,7 +2864,6 @@ def to_sql( 1 1 User 2 2 2 User 3 - An `sqlalchemy.engine.Connection` can also be passed to `con`: >>> with engine.begin() as connection: From 8e15910d631174e1a271fa091ddd0929590a86ba Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Tue, 10 Jan 2023 23:11:06 +0800 Subject: [PATCH 07/11] fix example res error --- pandas/core/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index b97efcb82a0c6..3e0972d5df6ae 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2893,9 +2893,9 @@ def to_sql( ... index_label='id') 2 >>> pd.read_sql_query("SELECT * FROM users", con=engine) - index name - 0 0 User 6 - 1 1 User 7 + id name + 0 0 User 6 + 1 1 User 7 Specify the dtype (especially useful for integers with missing values). Notice that while pandas is forced to store the data as floating point, From 5a198bc61f4597b3395c4475cfe215808b19d98d Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 11 Jan 2023 01:15:51 +0800 Subject: [PATCH 08/11] add blank line --- doc/source/whatsnew/v2.0.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 27053a9a5df23..3c914bb16ecde 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -554,8 +554,9 @@ Deprecations ~~~~~~~~~~~~ - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) - Deprecated :func:`pandas.io.sql.execute`(:issue:`50185`) -.. --------------------------------------------------------------------------- +- +.. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: Removal of prior version deprecations/changes From bba740595dd6519a1e2ba1ddf8168bfe47d62436 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 11 Jan 2023 22:51:42 +0800 Subject: [PATCH 09/11] Revert "change comments in core/generic.py" --- pandas/core/generic.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3e0972d5df6ae..0c83ab78c940a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2858,11 +2858,8 @@ def to_sql( >>> df.to_sql('users', con=engine) 3 - >>> pd.read_sql_query("SELECT * FROM users", con=engine) - index name - 0 0 User 1 - 1 1 User 2 - 2 2 User 3 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3')] An `sqlalchemy.engine.Connection` can also be passed to `con`: @@ -2892,10 +2889,8 @@ def to_sql( >>> df2.to_sql('users', con=engine, if_exists='replace', ... index_label='id') 2 - >>> pd.read_sql_query("SELECT * FROM users", con=engine) - id name - 0 0 User 6 - 1 1 User 7 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 6'), (1, 'User 7')] Specify the dtype (especially useful for integers with missing values). Notice that while pandas is forced to store the data as floating point, @@ -2914,11 +2909,8 @@ def to_sql( ... dtype={"A": Integer()}) 3 - >>> pd.read_sql("SELECT * FROM integers", con=engine) - A - 0 1.0 - 1 NaN - 2 2.0 + >>> engine.execute("SELECT * FROM integers").fetchall() + [(1,), (None,), (2,)] """ # noqa:E501 from pandas.io import sql From 364c98a3b14626a359b79703b50513f8bdadd019 Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 11 Jan 2023 22:52:40 +0800 Subject: [PATCH 10/11] Revert "DEPR: Enforce certain DataFrame reductions w/ axis=None to return scalars (#50593)" This reverts commit 47c9ee7b32d8bde2ff8cf50288a8787b11d512cb. --- doc/source/whatsnew/v2.0.0.rst | 1 - pandas/core/frame.py | 10 ++-- pandas/core/generic.py | 51 +++++++++++-------- pandas/tests/frame/test_reductions.py | 31 ++++++----- pandas/tests/groupby/test_categorical.py | 8 ++- pandas/tests/groupby/test_function.py | 16 ++++-- .../tests/groupby/transform/test_transform.py | 17 +++++-- 7 files changed, 80 insertions(+), 54 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 3c914bb16ecde..c5e51d27ba5f3 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -761,7 +761,6 @@ Removal of prior version deprecations/changes - Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`) - Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`) - Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`) -- Changed behavior of :meth:`DataFrame.max`, :class:`DataFrame.min`, :class:`DataFrame.mean`, :class:`DataFrame.median`, :class:`DataFrame.skew`, :class:`DataFrame.kurt` with ``axis=None`` to return a scalar applying the aggregation across both axes (:issue:`45072`) - Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`) - Changed behavior of comparison of ``NaT`` with a ``datetime.date`` object; these now raise on inequality comparisons (:issue:`39196`) - Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6491081c54592..fed92dc80a99b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -10357,8 +10357,9 @@ def _reduce( assert filter_type is None or filter_type == "bool", filter_type out_dtype = "bool" if filter_type == "bool" else None - if axis is not None: - axis = self._get_axis_number(axis) + # TODO: Make other agg func handle axis=None properly GH#21597 + axis = self._get_axis_number(axis) + assert axis in [0, 1] def func(values: np.ndarray): # We only use this in the case that operates on self.values @@ -10409,7 +10410,7 @@ def _get_data() -> DataFrame: return out - assert not numeric_only and axis in (1, None) + assert not numeric_only and axis == 1 data = self values = data.values @@ -10425,9 +10426,6 @@ def _get_data() -> DataFrame: # try to coerce to the original dtypes item by item if we can pass - if axis is None: - return result - labels = self._get_agg_axis(axis) result = self._constructor_sliced(result, index=labels) return result diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0c83ab78c940a..ded8c394edf83 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10949,7 +10949,7 @@ def _stat_function( self, name: str, func, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = None, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -10961,13 +10961,30 @@ def _stat_function( validate_bool_kwarg(skipna, "skipna", none_allowed=False) + if axis is None and self.ndim > 1: + # user must have explicitly passed axis=None + # GH#21597 + warnings.warn( + f"In a future version, DataFrame.{name}(axis=None) will return a " + f"scalar {name} over the entire DataFrame. To retain the old " + f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'", + FutureWarning, + stacklevel=find_stack_level(), + ) + + if axis is lib.no_default: + axis = None + + if axis is None: + axis = self._stat_axis_number + return self._reduce( func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only ) def min( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -10983,7 +11000,7 @@ def min( def max( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -10999,7 +11016,7 @@ def max( def mean( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11010,7 +11027,7 @@ def mean( def median( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11021,7 +11038,7 @@ def median( def skew( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11032,7 +11049,7 @@ def skew( def kurt( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11354,7 +11371,7 @@ def prod( ) def mean( self, - axis: AxisInt | None = 0, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11375,7 +11392,7 @@ def mean( ) def skew( self, - axis: AxisInt | None = 0, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11399,7 +11416,7 @@ def skew( ) def kurt( self, - axis: Axis | None = 0, + axis: Axis | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11421,7 +11438,7 @@ def kurt( ) def median( self, - axis: AxisInt | None = 0, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11444,7 +11461,7 @@ def median( ) def max( self, - axis: AxisInt | None = 0, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11467,7 +11484,7 @@ def max( ) def min( self, - axis: AxisInt | None = 0, + axis: AxisInt | None | lib.NoDefault = lib.no_default, skipna: bool_t = True, numeric_only: bool_t = False, **kwargs, @@ -11696,12 +11713,6 @@ def _doc_params(cls): axis : {axis_descr} Axis for the function to be applied on. For `Series` this parameter is unused and defaults to 0. - - For DataFrames, specifying ``axis=None`` will apply the aggregation - across both axes. - - .. versionadded:: 2.0.0 - skipna : bool, default True Exclude NA/null values when computing the result. numeric_only : bool, default False @@ -11713,7 +11724,7 @@ def _doc_params(cls): Returns ------- -{name1} or scalar\ +{name1} or {name2} (if level specified)\ {see_also}\ {examples} """ diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index f1d176e59373f..2e0aa5fd0cf40 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1488,6 +1488,7 @@ def test_median_categorical_dtype_nuisance_column(self): # TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead # of expected.values + @pytest.mark.filterwarnings("ignore:.*will return a scalar.*:FutureWarning") @pytest.mark.parametrize("method", ["min", "max"]) def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): # GH#28949 DataFrame.min should behave like Series.min @@ -1509,7 +1510,7 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): getattr(df, method)() with pytest.raises(TypeError, match="is not ordered for operation"): - getattr(np, method)(df, axis=0) + getattr(np, method)(df) # same thing, but with an additional non-categorical column df["B"] = df["A"].astype(object) @@ -1517,7 +1518,7 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method): getattr(df, method)() with pytest.raises(TypeError, match="is not ordered for operation"): - getattr(np, method)(df, axis=0) + getattr(np, method)(df) def test_sum_timedelta64_skipna_false(using_array_manager, request): @@ -1599,22 +1600,20 @@ def test_prod_sum_min_count_mixed_object(): @pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"]) -def test_reduction_axis_none_returns_scalar(method): - # GH#21597 As of 2.0, axis=None reduces over all axes. +def test_reduction_axis_none_deprecation(method): + # GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it + # to reducing over all axes. df = DataFrame(np.random.randn(4, 4)) - - result = getattr(df, method)(axis=None) - np_arr = df.to_numpy() - if method in {"skew", "kurt"}: - comp_mod = pytest.importorskip("scipy.stats") - if method == "kurt": - method = "kurtosis" - expected = getattr(comp_mod, method)(np_arr, bias=False, axis=None) - tm.assert_almost_equal(result, expected) - else: - expected = getattr(np, method)(np_arr, axis=None) - assert result == expected + meth = getattr(df, method) + + msg = f"scalar {method} over the entire DataFrame" + with tm.assert_produces_warning(FutureWarning, match=msg): + res = meth(axis=None) + with tm.assert_produces_warning(None): + expected = meth() + tm.assert_series_equal(res, expected) + tm.assert_series_equal(res, meth(axis=0)) @pytest.mark.parametrize( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 0fabdf84e5e86..d9181830925f7 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -147,7 +147,11 @@ def f(x): tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]]) gbc = df.groupby(c, observed=False) - result = gbc.transform(lambda xs: np.max(xs, axis=0)) + with tm.assert_produces_warning( + FutureWarning, match="scalar max", check_stacklevel=False + ): + # stacklevel is thrown off (i think) bc the stack goes through numpy C code + result = gbc.transform(lambda xs: np.max(xs)) tm.assert_frame_equal(result, df[["a"]]) with tm.assert_produces_warning(None): @@ -291,7 +295,7 @@ def test_apply(ordered): idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"]) expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"]) - result = grouped.apply(lambda x: np.mean(x, axis=0)) + result = grouped.apply(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) result = grouped.mean() diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 875037b390883..bb15783f4607f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -80,12 +80,20 @@ def test_builtins_apply(keys, f): assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" assert result.shape == (ngroups, 3), assert_msg - npfunc = lambda x: getattr(np, fname)(x, axis=0) # numpy's equivalent function - expected = gb.apply(npfunc) + npfunc = getattr(np, fname) # numpy's equivalent function + if f in [max, min]: + warn = FutureWarning + else: + warn = None + msg = "scalar (max|min) over the entire DataFrame" + with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): + # stacklevel can be thrown off because (i think) the stack + # goes through some of numpy's C code. + expected = gb.apply(npfunc) tm.assert_frame_equal(result, expected) with tm.assert_produces_warning(None): - expected2 = gb.apply(lambda x: npfunc(x)) + expected2 = gb.apply(lambda x: npfunc(x, axis=0)) tm.assert_frame_equal(result, expected2) if f != sum: @@ -93,7 +101,7 @@ def test_builtins_apply(keys, f): expected.set_index(keys, inplace=True, drop=False) tm.assert_frame_equal(result, expected, check_dtype=False) - tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0)) + tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)()) class TestNumericOnly: diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index cb3b7def39c52..8f1d52c2ea03d 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -57,7 +57,7 @@ def test_transform(): tm.assert_frame_equal(result, expected) def demean(arr): - return arr - arr.mean(axis=0) + return arr - arr.mean() people = DataFrame( np.random.randn(5, 5), @@ -144,7 +144,7 @@ def test_transform_broadcast(tsframe, ts): result = grouped.transform(np.mean) tm.assert_index_equal(result.index, tsframe.index) for _, gp in grouped: - agged = gp.mean(axis=0) + agged = gp.mean() res = result.reindex(gp.index) for col in tsframe: assert_fp_equal(res[col], agged[col]) @@ -214,7 +214,7 @@ def test_transform_axis_ts(tsframe): ts = tso grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: x - x.mean(axis=0)) + expected = grouped.apply(lambda x: x - x.mean()) tm.assert_frame_equal(result, expected) ts = ts.T @@ -227,7 +227,7 @@ def test_transform_axis_ts(tsframe): ts = tso.iloc[[1, 0] + list(range(2, len(base)))] grouped = ts.groupby(lambda x: x.weekday(), group_keys=False) result = ts - grouped.transform("mean") - expected = grouped.apply(lambda x: x - x.mean(axis=0)) + expected = grouped.apply(lambda x: x - x.mean()) tm.assert_frame_equal(result, expected) ts = ts.T @@ -477,9 +477,16 @@ def test_transform_coercion(): expected = g.transform(np.mean) - result = g.transform(lambda x: np.mean(x, axis=0)) + # in 2.0 np.mean on a DataFrame is equivalent to frame.mean(axis=None) + # which not gives a scalar instead of Series + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = g.transform(lambda x: np.mean(x)) tm.assert_frame_equal(result, expected) + with tm.assert_produces_warning(None): + result2 = g.transform(lambda x: np.mean(x, axis=0)) + tm.assert_frame_equal(result2, expected) + def test_groupby_transform_with_int(): From ade630fa69ffee6d6b094e3982edfe835d0f35db Mon Sep 17 00:00:00 2001 From: luke <2736230899@qq.com> Date: Wed, 11 Jan 2023 22:57:46 +0800 Subject: [PATCH 11/11] Revert "change comments in core/generic.py" This reverts commit 224ce2e96cc99d26125f38e5ebc4e351a8700b4c. --- pandas/core/generic.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index ded8c394edf83..3cade2568d921 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2874,15 +2874,10 @@ def to_sql( >>> df2 = pd.DataFrame({'name' : ['User 6', 'User 7']}) >>> df2.to_sql('users', con=engine, if_exists='append') 2 - >>> pd.read_sql_query("SELECT * FROM users", con=engine) - index name - 0 0 User 1 - 1 1 User 2 - 2 2 User 3 - 3 0 User 4 - 4 1 User 5 - 5 0 User 6 - 6 1 User 7 + >>> engine.execute("SELECT * FROM users").fetchall() + [(0, 'User 1'), (1, 'User 2'), (2, 'User 3'), + (0, 'User 4'), (1, 'User 5'), (0, 'User 6'), + (1, 'User 7')] Overwrite the table with just ``df2``.