From 7fed06dc2547a95af6f1beedd4ede0954175f559 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 13:43:41 -0500 Subject: [PATCH 01/28] Deprecate default observed=False for groupby --- pandas/core/frame.py | 2 +- pandas/core/groupby/groupby.py | 4 ++-- pandas/core/groupby/grouper.py | 19 +++++++++++++++++-- pandas/core/shared_docs.py | 9 +++++++++ pandas/tests/groupby/test_grouping.py | 15 +++++++++++++++ 5 files changed, 44 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f149f10b05d3..a79fe7eb7376e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6698,7 +6698,7 @@ def groupby( sort: bool = True, group_keys: bool = True, squeeze: bool = no_default, - observed: bool = False, + observed: Optional[bool] = None, dropna: bool = True, ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 798c0742f03e5..98d26ccb34a00 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -526,7 +526,7 @@ def __init__( sort: bool = True, group_keys: bool = True, squeeze: bool = False, - observed: bool = False, + observed: Optional[bool] = None, mutated: bool = False, dropna: bool = True, ): @@ -3016,7 +3016,7 @@ def get_groupby( sort: bool = True, group_keys: bool = True, squeeze: bool = False, - observed: bool = False, + observed: Optional[bool] = None, mutated: bool = False, dropna: bool = True, ) -> GroupBy: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e8af9da30a298..526d64a4b0260 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -2,6 +2,7 @@ Provide user facing operators for doing the split part of the split-apply-combine paradigm. """ +import textwrap from typing import Dict, Hashable, List, Optional, Set, Tuple import warnings @@ -31,6 +32,17 @@ from pandas.io.formats.printing import pprint_thing +_observed_msg = textwrap.dedent( + """\ +Using 'observed=False', because grouping on a categorical. A future version +of pandas will change to 'observed=True'. + +To silence the warning and switch to the future behavior, pass 'observed=True'. + +To keep the current behavior and silence the warning, pass 'observed=False'. +""" +) + class Grouper: """ @@ -432,7 +444,7 @@ def __init__( name=None, level=None, sort: bool = True, - observed: bool = False, + observed: Optional[bool] = None, in_axis: bool = False, dropna: bool = True, ): @@ -495,6 +507,9 @@ def __init__( # a passed Categorical elif is_categorical_dtype(self.grouper): + if observed is None: + warnings.warn(_observed_msg, FutureWarning, stacklevel=5) + self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed ) @@ -631,7 +646,7 @@ def get_grouper( axis: int = 0, level=None, sort: bool = True, - observed: bool = False, + observed: Optional[bool] = None, mutated: bool = False, validate: bool = True, dropna: bool = True, diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 3aeb3b664b27f..367adfe7d4a84 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -119,6 +119,15 @@ This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. + + The current default of ``observed=False`` is deprecated and will + change to ``observed=True`` in a future version of pandas. + + Explicitly pass ``observed=True`` to silence the warning and not + show all observed values. + Explicitly pass ``observed=False`` to silence the warning and + show groups for all observed values. + dropna : bool, default True If True, and if group keys contain NA values, NA values together with row/column will be dropped. diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 1d2208592a06d..133b657f46485 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -702,6 +702,21 @@ def test_groupby_multiindex_level_empty(self): ) tm.assert_frame_equal(result, expected) + def test_default_observed_deprecated(self): + df = pd.DataFrame([ + ['A', 1, 1], ['A', 2, 1], ['B', 1, 1] + ], columns=['x', 'y', 'z']) + df.x = df.x.astype('category') + df.y = df.x.astype('category') + + with tm.assert_produces_warning(expected_warning=FutureWarning): + df.groupby(['x', 'y']) + + with pytest.warns(None) as any_warnings: + df.groupby(['x', 'y'], observed=True) + df.groupby(['x', 'y'], observed=False) + assert len(any_warnings) == 0 + # get_group # -------------------------------- From 32397cfdf7ce4ccf237eb2ba6358aadeed7854cf Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 14:44:05 -0500 Subject: [PATCH 02/28] Deprecate default for series --- pandas/core/groupby/grouper.py | 1 + pandas/core/series.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 526d64a4b0260..ff3b18d07911e 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -509,6 +509,7 @@ def __init__( if observed is None: warnings.warn(_observed_msg, FutureWarning, stacklevel=5) + observed = False self.grouper, self.all_grouper = recode_for_groupby( self.grouper, self.sort, observed diff --git a/pandas/core/series.py b/pandas/core/series.py index b20cf8eed9a2e..b51e2a42293d0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1674,7 +1674,7 @@ def groupby( sort: bool = True, group_keys: bool = True, squeeze: bool = no_default, - observed: bool = False, + observed: Optional[bool] = None, dropna: bool = True, ) -> "SeriesGroupBy": from pandas.core.groupby.generic import SeriesGroupBy From 5b7507c67b0f51f1c2da73a512ff9fce755c256d Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 15:00:53 -0500 Subject: [PATCH 03/28] Test warns for series --- pandas/tests/groupby/test_grouping.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 133b657f46485..699a72813156e 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -717,6 +717,11 @@ def test_default_observed_deprecated(self): df.groupby(['x', 'y'], observed=False) assert len(any_warnings) == 0 + cat = pd.Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C', 'D']) + s = Series(cat) + with tm.assert_produces_warning(expected_warning=FutureWarning): + s.groupby(cat) + # get_group # -------------------------------- From 6a45118afa775ab9e4351ecc026938758fb3cd8f Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:19:11 -0500 Subject: [PATCH 04/28] Test that pivot warns --- pandas/tests/reshape/test_pivot.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f9b2a02920841..746e1d7808fa1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -84,8 +84,8 @@ def test_pivot_table(self, observed): index = ["A", "B"] columns = "C" table = pivot_table( - self.data, values="D", index=index, columns=columns, observed=observed - ) + self.data, values="D", index=index, columns=columns, + observed=observed) table2 = self.data.pivot_table( values="D", index=index, columns=columns, observed=observed @@ -2186,3 +2186,10 @@ def test_pivot_index_list_values_none_immutable_args(self): assert index == ["lev1", "lev2"] assert columns == ["lev3"] + + def test_pivot_table_observed_deprecated_default(self): + with tm.assert_produces_warning(FutureWarning): + # make sure we actually have a category to warn on + self.data.A = self.data.A.astype('category') + self.data.pivot_table(values="D", index=["A", "B"], + columns=["C"]) From dd02ced5ef45adecec08c2c931d0602a6113b3d6 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:20:49 -0500 Subject: [PATCH 05/28] Change default observed for pivot table --- pandas/core/frame.py | 2 +- pandas/core/reshape/pivot.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a79fe7eb7376e..e4cc3653abb70 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7029,7 +7029,7 @@ def pivot_table( margins=False, dropna=True, margins_name="All", - observed=False, + observed=None, ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 40496a5b8671b..49500494e45e9 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -46,7 +46,7 @@ def pivot_table( margins=False, dropna=True, margins_name="All", - observed=False, + observed=None, ) -> "DataFrame": index = _convert_by(index) columns = _convert_by(columns) From b1e7b503e0ff7f7592228e1ededc098179ecfdcb Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:21:40 -0500 Subject: [PATCH 06/28] Don't set stacklevel bc of multiple entrypoints --- pandas/core/groupby/grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index ff3b18d07911e..77c32e5eaab80 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -508,7 +508,7 @@ def __init__( elif is_categorical_dtype(self.grouper): if observed is None: - warnings.warn(_observed_msg, FutureWarning, stacklevel=5) + warnings.warn(_observed_msg, FutureWarning) observed = False self.grouper, self.all_grouper = recode_for_groupby( From 1e311d7107d4dcbcefe950bdc5a550860c8658c7 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:38:47 -0500 Subject: [PATCH 07/28] Don't check stacklevel on tests. This code can be reached from multiple places. Keeping up with the stacklevel seems like overkill here. --- pandas/tests/groupby/test_grouping.py | 6 ++++-- pandas/tests/reshape/test_pivot.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 699a72813156e..34eaf91867228 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -709,7 +709,8 @@ def test_default_observed_deprecated(self): df.x = df.x.astype('category') df.y = df.x.astype('category') - with tm.assert_produces_warning(expected_warning=FutureWarning): + with tm.assert_produces_warning(expected_warning=FutureWarning, + check_stacklevel=False): df.groupby(['x', 'y']) with pytest.warns(None) as any_warnings: @@ -719,7 +720,8 @@ def test_default_observed_deprecated(self): cat = pd.Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C', 'D']) s = Series(cat) - with tm.assert_produces_warning(expected_warning=FutureWarning): + with tm.assert_produces_warning(expected_warning=FutureWarning, + check_stacklevel=False): s.groupby(cat) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 746e1d7808fa1..947c83d1a6c40 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2188,7 +2188,8 @@ def test_pivot_index_list_values_none_immutable_args(self): assert columns == ["lev3"] def test_pivot_table_observed_deprecated_default(self): - with tm.assert_produces_warning(FutureWarning): + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): # make sure we actually have a category to warn on self.data.A = self.data.A.astype('category') self.data.pivot_table(values="D", index=["A", "B"], From 8b46e0f049aa8a2b59762ce9c5e43299d9baf6c7 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:41:23 -0500 Subject: [PATCH 08/28] Blacken --- pandas/tests/groupby/test_grouping.py | 28 ++++++++++++++------------- pandas/tests/reshape/test_pivot.py | 12 +++++------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 34eaf91867228..2704f92d1d2cc 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -703,25 +703,27 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) def test_default_observed_deprecated(self): - df = pd.DataFrame([ - ['A', 1, 1], ['A', 2, 1], ['B', 1, 1] - ], columns=['x', 'y', 'z']) - df.x = df.x.astype('category') - df.y = df.x.astype('category') + df = pd.DataFrame( + [["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"] + ) + df.x = df.x.astype("category") + df.y = df.x.astype("category") - with tm.assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): - df.groupby(['x', 'y']) + with tm.assert_produces_warning( + expected_warning=FutureWarning, check_stacklevel=False + ): + df.groupby(["x", "y"]) with pytest.warns(None) as any_warnings: - df.groupby(['x', 'y'], observed=True) - df.groupby(['x', 'y'], observed=False) + df.groupby(["x", "y"], observed=True) + df.groupby(["x", "y"], observed=False) assert len(any_warnings) == 0 - cat = pd.Categorical(['A', 'B', 'C'], categories=['A', 'B', 'C', 'D']) + cat = pd.Categorical(["A", "B", "C"], categories=["A", "B", "C", "D"]) s = Series(cat) - with tm.assert_produces_warning(expected_warning=FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning( + expected_warning=FutureWarning, check_stacklevel=False + ): s.groupby(cat) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 947c83d1a6c40..aa426d44c876c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -84,8 +84,8 @@ def test_pivot_table(self, observed): index = ["A", "B"] columns = "C" table = pivot_table( - self.data, values="D", index=index, columns=columns, - observed=observed) + self.data, values="D", index=index, columns=columns, observed=observed + ) table2 = self.data.pivot_table( values="D", index=index, columns=columns, observed=observed @@ -2188,9 +2188,7 @@ def test_pivot_index_list_values_none_immutable_args(self): assert columns == ["lev3"] def test_pivot_table_observed_deprecated_default(self): - with tm.assert_produces_warning(FutureWarning, - check_stacklevel=False): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # make sure we actually have a category to warn on - self.data.A = self.data.A.astype('category') - self.data.pivot_table(values="D", index=["A", "B"], - columns=["C"]) + self.data.A = self.data.A.astype("category") + self.data.pivot_table(values="D", index=["A", "B"], columns=["C"]) From 41370409aad13119d01e83c99f0f5ba595a11209 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 28 Aug 2020 16:49:46 -0500 Subject: [PATCH 09/28] Add deprecation doc --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8dbc6728dccfe..ce6e2a1395868 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -522,6 +522,7 @@ Deprecations - Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`.DatetimeIndex`, :class:`.TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`) - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`) - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`) +- Deprecated default keyword argument of ``observed=False`` in :~meth:`DataFrame.groupby` and :~meth:`DataFrame.pivot_table` (:issue:`17594`) .. --------------------------------------------------------------------------- From 29b3bf85a7b58b3a19d7e67ed08ffdb65638488a Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 31 Aug 2020 10:51:30 -0500 Subject: [PATCH 10/28] Don't use pytest.warns --- pandas/tests/groupby/test_grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 2704f92d1d2cc..47a5ca3199f77 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -714,7 +714,7 @@ def test_default_observed_deprecated(self): ): df.groupby(["x", "y"]) - with pytest.warns(None) as any_warnings: + with tm.assert_produces_warning(None) as any_warnings: df.groupby(["x", "y"], observed=True) df.groupby(["x", "y"], observed=False) assert len(any_warnings) == 0 From 5e30b0fd4493ed275c4e94b5a1df5e2aaa0fb3ca Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 31 Aug 2020 13:57:08 -0500 Subject: [PATCH 11/28] okwarning option --- doc/source/user_guide/10min.rst | 1 + doc/source/user_guide/advanced.rst | 1 + doc/source/user_guide/categorical.rst | 9 ++++++++- doc/source/user_guide/groupby.rst | 2 +- doc/source/whatsnew/v0.19.0.rst | 1 + doc/source/whatsnew/v0.20.0.rst | 1 + doc/source/whatsnew/v0.22.0.rst | 1 + 7 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index cf548ba5d1133..5e528415e7d33 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -705,6 +705,7 @@ Sorting is per order in the categories, not lexical order. Grouping by a categorical column also shows empty categories. .. ipython:: python + :okwarning: df.groupby("grade").size() diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 2cd48ac7adb0e..47b20c18c7903 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -808,6 +808,7 @@ order is ``cab``). Groupby operations on the index will preserve the index nature as well. .. ipython:: python + :okwarning: df2.groupby(level=0).sum() df2.groupby(level=0).sum().index diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 5c43de05fb5b9..7d94610161cf6 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -615,9 +615,12 @@ Apart from :meth:`Series.min`, :meth:`Series.max` and :meth:`Series.mode`, the following operations are possible with categorical data: ``Series`` methods like :meth:`Series.value_counts` will use all categories, -even if some categories are not present in the data: +even if some categories are not present in the data, though this default is +deprecated and will be changed in a future release. It is recommended to use +the `observed` keyword explicitly: .. ipython:: python + :okwarning: s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"])) s.value_counts() @@ -625,6 +628,7 @@ even if some categories are not present in the data: ``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. .. ipython:: python + :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -638,6 +642,7 @@ even if some categories are not present in the data: Groupby will also show "unused" categories: .. ipython:: python + :okwarning: cats = pd.Categorical( ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] @@ -659,6 +664,7 @@ Groupby will also show "unused" categories: Pivot tables: .. ipython:: python + :okwarning: raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]}) @@ -676,6 +682,7 @@ Getting If the slicing operation returns either a ``DataFrame`` or a column of type ``Series``, the ``category`` dtype is preserved. + :okwarning: .. ipython:: python diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst index d6081155b58db..b6f30beae1dbb 100644 --- a/doc/source/user_guide/groupby.rst +++ b/doc/source/user_guide/groupby.rst @@ -1269,7 +1269,7 @@ can be used as group keys. If so, the order of the levels will be preserved: factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0]) - data.groupby(factor).mean() + data.groupby(factor, observed=True).mean() .. _groupby.specify: diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 340e1ce9ee1ef..73e8bc523b7b1 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1131,6 +1131,7 @@ An analogous change has been made to ``MultiIndex.from_product``. As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes in indexes .. ipython:: python + :okwarning: df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat}) df_grouped = df.groupby(by=["A", "C"]).first() diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 2cb8e13e9a18a..edac69a25f625 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -291,6 +291,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr **New behavior**: .. ipython:: python + :okwarning: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst index ec9769c22e76b..2b2532c227cab 100644 --- a/doc/source/whatsnew/v0.22.0.rst +++ b/doc/source/whatsnew/v0.22.0.rst @@ -118,6 +118,7 @@ instead of ``NaN``. *pandas 0.22* .. ipython:: python + :okwarning: grouper = pd.Categorical(["a", "a"], categories=["a", "b"]) pd.Series([1, 2]).groupby(grouper).sum() From 4b9c36ea0dce555650dd27a5a55a78c0db3c0983 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 10:09:43 -0600 Subject: [PATCH 12/28] Remove incorrect series note --- doc/source/user_guide/categorical.rst | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 7d94610161cf6..3703af200fb7d 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -615,20 +615,16 @@ Apart from :meth:`Series.min`, :meth:`Series.max` and :meth:`Series.mode`, the following operations are possible with categorical data: ``Series`` methods like :meth:`Series.value_counts` will use all categories, -even if some categories are not present in the data, though this default is -deprecated and will be changed in a future release. It is recommended to use -the `observed` keyword explicitly: +even if some categories are not present in the data: .. ipython:: python - :okwarning: s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"])) s.value_counts() -``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories. +``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories: .. ipython:: python - :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -639,7 +635,9 @@ the `observed` keyword explicitly: ) df.sum(axis=1, level=1) -Groupby will also show "unused" categories: +Groupby will also show "unused" categories, though this default is deprecated +and will be changed in a future release. It is recommended to use the +`observed` keyword explicitly: .. ipython:: python :okwarning: From 2deaea41513f756cd004e5f54cf346b4807efb32 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 10:44:14 -0600 Subject: [PATCH 13/28] Hoop jump --- pandas/tests/groupby/test_grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 47a5ca3199f77..47eec539a0078 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -703,7 +703,7 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) def test_default_observed_deprecated(self): - df = pd.DataFrame( + df = DataFrame( [["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"] ) df.x = df.x.astype("category") From d06f70d327bf3bfe08bf739b6f51362429ec215b Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 10:53:44 -0600 Subject: [PATCH 14/28] Blacken --- pandas/tests/groupby/test_grouping.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 47eec539a0078..f7ab9ca92a756 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -703,9 +703,7 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) def test_default_observed_deprecated(self): - df = DataFrame( - [["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"] - ) + df = DataFrame([["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"]) df.x = df.x.astype("category") df.y = df.x.astype("category") From e7a99757980888f2e07d45cac6a8c1558023b15b Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 10:54:28 -0600 Subject: [PATCH 15/28] Fix backticks --- doc/source/user_guide/categorical.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 3703af200fb7d..4e4197a2f38a8 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -637,7 +637,7 @@ even if some categories are not present in the data: Groupby will also show "unused" categories, though this default is deprecated and will be changed in a future release. It is recommended to use the -`observed` keyword explicitly: +``observed`` keyword explicitly: .. ipython:: python :okwarning: From 61fe1a37fa0f9df666a9fce63bc2f0c6290d7ae6 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 12:46:21 -0600 Subject: [PATCH 16/28] okwarning --- doc/source/user_guide/advanced.rst | 2 +- doc/source/user_guide/categorical.rst | 6 +++--- doc/source/whatsnew/v0.19.0.rst | 2 +- doc/source/whatsnew/v0.20.0.rst | 2 +- doc/source/whatsnew/v0.22.0.rst | 3 ++- doc/source/whatsnew/v0.23.0.rst | 1 + 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 47b20c18c7903..8963615b7c627 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -808,7 +808,7 @@ order is ``cab``). Groupby operations on the index will preserve the index nature as well. .. ipython:: python - :okwarning: + :okwarning: df2.groupby(level=0).sum() df2.groupby(level=0).sum().index diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 4e4197a2f38a8..84b962bcd3b5e 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -625,6 +625,7 @@ even if some categories are not present in the data: ``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories: .. ipython:: python + :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -640,7 +641,7 @@ and will be changed in a future release. It is recommended to use the ``observed`` keyword explicitly: .. ipython:: python - :okwarning: + :okwarning: cats = pd.Categorical( ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] @@ -662,7 +663,7 @@ and will be changed in a future release. It is recommended to use the Pivot tables: .. ipython:: python - :okwarning: + :okwarning: raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]}) @@ -680,7 +681,6 @@ Getting If the slicing operation returns either a ``DataFrame`` or a column of type ``Series``, the ``category`` dtype is preserved. - :okwarning: .. ipython:: python diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 73e8bc523b7b1..cec8e44806250 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -1131,7 +1131,7 @@ An analogous change has been made to ``MultiIndex.from_product``. As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes in indexes .. ipython:: python - :okwarning: + :okwarning: df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat}) df_grouped = df.groupby(by=["A", "C"]).first() diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index edac69a25f625..dbd77aab4ff3d 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -291,7 +291,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr **New behavior**: .. ipython:: python - :okwarning: + :okwarning: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst index 2b2532c227cab..d8672be0bc711 100644 --- a/doc/source/whatsnew/v0.22.0.rst +++ b/doc/source/whatsnew/v0.22.0.rst @@ -118,7 +118,7 @@ instead of ``NaN``. *pandas 0.22* .. ipython:: python - :okwarning: + :okwarning: grouper = pd.Categorical(["a", "a"], categories=["a", "b"]) pd.Series([1, 2]).groupby(grouper).sum() @@ -127,6 +127,7 @@ To restore the 0.21 behavior of returning ``NaN`` for unobserved groups, use ``min_count>=1``. .. ipython:: python + :okwarning: pd.Series([1, 2]).groupby(grouper).sum(min_count=1) diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst index f4caea9d363eb..a763803d6fa3b 100644 --- a/doc/source/whatsnew/v0.23.0.rst +++ b/doc/source/whatsnew/v0.23.0.rst @@ -288,6 +288,7 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna`` df .. ipython:: python + :okwarning: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True) From bacb61df2f5afbd8a83964900156ffec94c93206 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 30 Nov 2020 12:56:19 -0600 Subject: [PATCH 17/28] Move method to right class. Rebase mistake. --- pandas/tests/reshape/test_pivot.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index aa426d44c876c..519dc553bbedb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -12,10 +12,10 @@ Index, MultiIndex, Series, + _testing as tm, concat, date_range, ) -import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT from pandas.core.reshape.pivot import pivot_table @@ -2058,6 +2058,12 @@ def agg(arr): with pytest.raises(KeyError, match="notpresent"): foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) + def test_pivot_table_observed_deprecated_default(self): + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + # make sure we actually have a category to warn on + self.data.A = self.data.A.astype("category") + self.data.pivot_table(values="D", index=["A", "B"], columns=["C"]) + class TestPivot: def test_pivot(self): @@ -2186,9 +2192,3 @@ def test_pivot_index_list_values_none_immutable_args(self): assert index == ["lev1", "lev2"] assert columns == ["lev3"] - - def test_pivot_table_observed_deprecated_default(self): - with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): - # make sure we actually have a category to warn on - self.data.A = self.data.A.astype("category") - self.data.pivot_table(values="D", index=["A", "B"], columns=["C"]) From 394fe5bbbe76c0bddf3a6163406d65c0c547b3b2 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 2 Dec 2020 11:39:04 -0600 Subject: [PATCH 18/28] Expect FutureWarning --- pandas/tests/generic/test_duplicate_labels.py | 1 + .../tests/groupby/aggregate/test_aggregate.py | 2 ++ pandas/tests/groupby/aggregate/test_cython.py | 2 ++ pandas/tests/groupby/aggregate/test_other.py | 1 + pandas/tests/groupby/test_categorical.py | 23 +++++++++++++++++++ pandas/tests/groupby/test_function.py | 1 + pandas/tests/groupby/test_grouping.py | 2 ++ pandas/tests/groupby/test_size.py | 1 + .../tests/groupby/transform/test_transform.py | 2 ++ pandas/tests/reshape/test_pivot.py | 9 ++++++++ 10 files changed, 44 insertions(+) diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index 1b32675ec2d35..aa872b7293519 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -384,6 +384,7 @@ def test_merge_raises(self): ], ids=lambda x: type(x).__name__, ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_raises_basic(idx): msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 073918eda3deb..3f9352b5cf5b5 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1060,6 +1060,7 @@ def test_groupby_get_by_index(): ({"nr": "min"}, {"nr": [1, 5]}), ], ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): # test single aggregations on ordered categorical cols GHGH27800 @@ -1094,6 +1095,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), ], ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): # test combined aggregations on ordered categorical cols GH27800 diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index c907391917ca8..34515b585721f 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -175,6 +175,7 @@ def test__cython_agg_general(op, targop): ("max", np.max), ], ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_cython_agg_empty_buckets(op, targop, observed): df = DataFrame([11, 12, 13]) grps = range(0, 55, 5) @@ -189,6 +190,7 @@ def test_cython_agg_empty_buckets(op, targop, observed): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_cython_agg_empty_buckets_nanops(observed): # GH-18869 can't call nanops on empty groups, so hardcode expected # for these diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index 5d0f6d6262899..b4d166c8d4d08 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -555,6 +555,7 @@ def test_agg_structs_series(structure, expected): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_agg_category_nansum(observed): categories = ["a", "b", "c"] df = DataFrame( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 8cf77ca6335f4..603ddf83a4690 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -212,6 +212,7 @@ def f(x): tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_level_get_group(observed): # GH15155 df = DataFrame( @@ -276,6 +277,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper @@ -384,11 +386,13 @@ def test_observed(observed): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_observed_codes_remap(observed): d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]} df = DataFrame(d) values = pd.cut(df["C1"], [1, 2, 3, 6]) values.name = "cat" + groups_double_key = df.groupby([values, "C2"], observed=observed) idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"]) @@ -423,12 +427,14 @@ def test_observed_perf(): assert result.index.levels[2].nunique() == df.other_id.nunique() +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_observed_groups(observed): # gh-20583 # test that we have the appropriate groups cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"]) df = DataFrame({"cat": cat, "vals": [1, 2, 3]}) + g = df.groupby("cat", observed=observed) result = g.groups @@ -444,6 +450,7 @@ def test_observed_groups(observed): tm.assert_dict_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_observed_groups_with_nan(observed): # GH 24740 df = DataFrame( @@ -480,6 +487,7 @@ def test_observed_nth(): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_dataframe_categorical_with_nan(observed): # GH 21151 s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"]) @@ -503,6 +511,7 @@ def test_dataframe_categorical_with_nan(observed): @pytest.mark.parametrize("ordered", [True, False]) @pytest.mark.parametrize("observed", [True, False]) @pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): # GH 25871: Fix groupby sorting on ordered Categoricals # GH 25167: Groupby with observed=True doesn't sort @@ -1050,6 +1059,7 @@ def test_empty_prod(): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_multiindex_categorical_datetime(): # https://github.com/pandas-dev/pandas/issues/21390 @@ -1167,6 +1177,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs): @pytest.mark.parametrize("operation", ["agg", "apply"]) @pytest.mark.parametrize("observed", [False, None]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): # GH 24880 index, _ = MultiIndex.from_product( @@ -1231,6 +1242,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): ), ], ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): # GH 24880 expected = Series(data=data, index=index, name="C") @@ -1240,6 +1252,7 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_categorical_series_dataframe_consistent(df_cat): # GH 20416 expected = df_cat.groupby(["A", "B"])["C"].mean() @@ -1248,6 +1261,7 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat): @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_categorical_axis_1(code): # GH 13420 df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) @@ -1257,6 +1271,7 @@ def test_groupby_categorical_axis_1(code): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( @@ -1285,6 +1300,7 @@ def test_get_nonexistent_category(): ) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request): # GH 17605 if reduction_func == "ngroup": @@ -1384,6 +1400,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun @pytest.mark.parametrize("observed", [False, None]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( reduction_func, observed, request ): @@ -1417,6 +1434,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( assert (res.loc[unobserved_cats] == expected).all().all() +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_series_groupby_categorical_aggregation_getitem(): # GH 8870 d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} @@ -1472,6 +1490,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func): tm.assert_series_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_read_only_category_no_sort(): # GH33410 cats = np.array([1, 2]) @@ -1480,10 +1499,12 @@ def test_read_only_category_no_sort(): {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))} ) expected = DataFrame(data={"a": [2, 6]}, index=CategoricalIndex([1, 2], name="b")) + result = df.groupby("b", sort=False).mean() tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_sorted_missing_category_values(): # GH 28597 df = DataFrame( @@ -1631,6 +1652,7 @@ def test_categorical_transform(): @pytest.mark.parametrize("func", ["first", "last"]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( func: str, observed: bool ): @@ -1656,6 +1678,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( @pytest.mark.parametrize("func", ["first", "last"]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( func: str, observed: bool ): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 12e570490487d..6f202ef4bf97a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -410,6 +410,7 @@ def test_cython_median(): tm.assert_frame_equal(rs, xp) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_median_empty_bins(observed): df = DataFrame(np.random.randint(0, 44, 500)) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index f7ab9ca92a756..083f3d8084114 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -311,6 +311,7 @@ def test_groupby_levels_and_columns(self): by_columns.columns = by_columns.columns.astype(np.int64) tm.assert_frame_equal(by_levels, by_columns) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_categorical_index_and_columns(self, observed): # GH18432, adapted for GH25871 columns = ["A", "B", "A", "B"] @@ -777,6 +778,7 @@ def test_get_group(self): with pytest.raises(ValueError, match=msg): g.get_group(("foo", "bar", "baz")) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_get_group_empty_bins(self, observed): d = DataFrame([3, 1, 7, 6]) diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index ba27e5a24ba00..db44aa56dcebd 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -47,6 +47,7 @@ def test_size_period_index(): @pytest.mark.parametrize("as_index", [True, False]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_size_on_categorical(as_index): df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"]) df["A"] = df["A"].astype("category") diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 8acd051fbc643..5e92ea09b2ec9 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -987,6 +987,7 @@ def test_groupby_transform_with_datetimes(func, values): @pytest.mark.parametrize("func", ["cumsum", "cumprod", "cummin", "cummax"]) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_transform_absent_categories(func): # GH 16771 # cython transforms with more groups than rows @@ -1153,6 +1154,7 @@ def test_transform_lambda_indexing(): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_categorical_and_not_categorical_key(observed): # Checks that groupby-transform, when grouping by both a categorical # and a non-categorical key, doesn't try to expand the output to include diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 519dc553bbedb..f3f16493229ed 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -108,6 +108,7 @@ def test_pivot_table(self, observed): expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack() tm.assert_frame_equal(table, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_table_categorical_observed_equal(self, observed): # issue #24923 df = DataFrame( @@ -184,6 +185,7 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_table_categorical(self): cat1 = Categorical( @@ -199,6 +201,7 @@ def test_pivot_table_categorical(self): expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_table_dropna_categoricals(self, dropna): # GH 15193 categories = ["a", "b", "c", "d"] @@ -227,6 +230,7 @@ def test_pivot_table_dropna_categoricals(self, dropna): tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_non_observable_dropna(self, dropna): # gh-21133 df = DataFrame( @@ -278,6 +282,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 df = DataFrame({"A": interval_values, "B": 1}) @@ -285,6 +290,7 @@ def test_pivot_with_interval_index(self, interval_values, dropna): expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_interval_index_margins(self): # GH 25815 ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) @@ -1752,6 +1758,7 @@ def test_margins_casted_to_float(self, observed): ) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_categorical(self, observed, ordered): # gh-21370 idx = [np.nan, "low", "high", "low", np.nan] @@ -1787,6 +1794,7 @@ def test_pivot_with_categorical(self, observed, ordered): tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_categorical_aggfunc(self, observed): # GH 9534 df = DataFrame( @@ -1807,6 +1815,7 @@ def test_categorical_aggfunc(self, observed): ) tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_categorical_pivot_index_ordering(self, observed): # GH 8731 df = DataFrame( From be6d3c1cc05ff2ee73ee8cf0e768f852f5d6c048 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Wed, 2 Dec 2020 14:26:28 -0600 Subject: [PATCH 19/28] crosstab doesn't have observed keyword --- pandas/core/reshape/pivot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 49500494e45e9..19a56b1651197 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -612,6 +612,8 @@ def crosstab( margins=margins, margins_name=margins_name, dropna=dropna, + # the below is only here to silence the FutureWarning + observed=False, **kwargs, ) From 6cca8c02ae5ba3a9ade6848b3e25c11b54af5141 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 3 Dec 2020 17:40:27 -0600 Subject: [PATCH 20/28] Filter fewer warnings --- doc/source/user_guide/10min.rst | 5 ++-- doc/source/user_guide/advanced.rst | 5 ++-- doc/source/user_guide/categorical.rst | 11 +++----- .../tests/groupby/aggregate/test_aggregate.py | 9 +++---- pandas/tests/groupby/test_categorical.py | 13 ++++----- pandas/tests/groupby/test_groupby.py | 4 +-- pandas/tests/groupby/test_groupby_subclass.py | 5 ++-- pandas/tests/groupby/test_size.py | 6 ++--- .../tests/groupby/transform/test_transform.py | 12 +++------ pandas/tests/reshape/test_pivot.py | 27 +++++++++++-------- 10 files changed, 41 insertions(+), 56 deletions(-) diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst index 5e528415e7d33..81c33b53e21a8 100644 --- a/doc/source/user_guide/10min.rst +++ b/doc/source/user_guide/10min.rst @@ -702,12 +702,11 @@ Sorting is per order in the categories, not lexical order. df.sort_values(by="grade") -Grouping by a categorical column also shows empty categories. +Grouping by a categorical column can also show empty categories, using the observed keyword. .. ipython:: python - :okwarning: - df.groupby("grade").size() + df.groupby("grade", observed=False).size() Plotting diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 8963615b7c627..f952bd9150ce5 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -808,10 +808,9 @@ order is ``cab``). Groupby operations on the index will preserve the index nature as well. .. ipython:: python - :okwarning: - df2.groupby(level=0).sum() - df2.groupby(level=0).sum().index + df2.groupby(level=0, observed=False).sum() + df2.groupby(level=0, observed=False).sum().index Reindexing operations will return a resulting index based on the type of the passed indexer. Passing a list will return a plain-old ``Index``; indexing with diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index 84b962bcd3b5e..b0d93817e57cf 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -625,7 +625,6 @@ even if some categories are not present in the data: ``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories: .. ipython:: python - :okwarning: columns = pd.Categorical( ["One", "One", "Two"], categories=["One", "Two", "Three"], ordered=True @@ -638,16 +637,15 @@ even if some categories are not present in the data: Groupby will also show "unused" categories, though this default is deprecated and will be changed in a future release. It is recommended to use the -``observed`` keyword explicitly: +``observed`` keyword explicitly as below: .. ipython:: python - :okwarning: cats = pd.Categorical( ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"] ) df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]}) - df.groupby("cats").mean() + df.groupby("cats", observed=False).mean() cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df2 = pd.DataFrame( @@ -657,17 +655,16 @@ and will be changed in a future release. It is recommended to use the "values": [1, 2, 3, 4], } ) - df2.groupby(["cats", "B"]).mean() + df2.groupby(["cats", "B"], observed=False).mean() Pivot tables: .. ipython:: python - :okwarning: raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"]) df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]}) - pd.pivot_table(df, values="values", index=["A", "B"]) + pd.pivot_table(df, values="values", index=["A", "B"], observed=False) Data munging ------------ diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3f9352b5cf5b5..cd3757f6a5ecf 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -13,8 +13,7 @@ from pandas.core.dtypes.common import is_integer_dtype import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, concat -import pandas._testing as tm +from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm, concat from pandas.core.base import SpecificationError from pandas.core.groupby.grouper import Grouping @@ -1060,7 +1059,6 @@ def test_groupby_get_by_index(): ({"nr": "min"}, {"nr": [1, 5]}), ], ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): # test single aggregations on ordered categorical cols GHGH27800 @@ -1075,7 +1073,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() - result_df = input_df.groupby("cat").agg(grp_col_dict) + result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict) # create expected dataframe cat_index = pd.CategoricalIndex( @@ -1095,7 +1093,6 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data): ({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]), ], ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): # test combined aggregations on ordered categorical cols GH27800 @@ -1110,7 +1107,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data): input_df = input_df.astype({"cat": "category", "cat_ord": "category"}) input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered() - result_df = input_df.groupby("cat").agg(grp_col_dict) + result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict) # create expected dataframe cat_index = pd.CategoricalIndex( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 603ddf83a4690..b17a7adeee970 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -11,9 +11,9 @@ Index, MultiIndex, Series, + _testing as tm, qcut, ) -import pandas._testing as tm def cartesian_product_for_groupers(result, args, names, fill_value=np.NaN): @@ -1059,7 +1059,6 @@ def test_empty_prod(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_multiindex_categorical_datetime(): # https://github.com/pandas-dev/pandas/issues/21390 @@ -1072,7 +1071,7 @@ def test_groupby_multiindex_categorical_datetime(): "values": np.arange(9), } ) - result = df.groupby(["key1", "key2"]).mean() + result = df.groupby(["key1", "key2"], observed=False).mean() idx = MultiIndex.from_product( [ @@ -1252,11 +1251,10 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_groupby_categorical_series_dataframe_consistent(df_cat): # GH 20416 - expected = df_cat.groupby(["A", "B"])["C"].mean() - result = df_cat.groupby(["A", "B"]).mean()["C"] + expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean() + result = df_cat.groupby(["A", "B"], observed=False).mean()["C"] tm.assert_series_equal(result, expected) @@ -1448,8 +1446,7 @@ def test_series_groupby_categorical_aggregation_getitem(): @pytest.mark.parametrize( - "func, expected_values", - [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], + "func, expected_values", [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], ) def test_groupby_agg_categorical_columns(func, expected_values): # 31256 diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 7c179a79513fa..a96789a7c80ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -15,10 +15,10 @@ MultiIndex, Series, Timestamp, + _testing as tm, date_range, read_csv, ) -import pandas._testing as tm from pandas.core.base import SpecificationError import pandas.core.common as com @@ -2012,7 +2012,7 @@ def test_dup_labels_output_shape(groupby_func, idx): pytest.skip("Not applicable") df = DataFrame([[1, 1]], columns=idx) - grp_by = df.groupby([0]) + grp_by = df.groupby([0], observed=False) args = [] if groupby_func in {"fillna", "nth"}: diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index d268d87708552..574a42fb7224e 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -3,8 +3,7 @@ import numpy as np import pytest -from pandas import DataFrame, Series -import pandas._testing as tm +from pandas import DataFrame, Series, _testing as tm @pytest.mark.parametrize( @@ -21,7 +20,7 @@ def test_groupby_preserves_subclass(obj, groupby_func): if isinstance(obj, Series) and groupby_func in {"corrwith"}: pytest.skip("Not applicable") - grouped = obj.groupby(np.arange(0, 10)) + grouped = obj.groupby(np.arange(0, 10), observed=False) # Groups should preserve subclass type assert isinstance(grouped.get_group(0), type(obj)) diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py index db44aa56dcebd..cb724d46bc0d1 100644 --- a/pandas/tests/groupby/test_size.py +++ b/pandas/tests/groupby/test_size.py @@ -1,8 +1,7 @@ import numpy as np import pytest -from pandas import DataFrame, Index, PeriodIndex, Series -import pandas._testing as tm +from pandas import DataFrame, Index, PeriodIndex, Series, _testing as tm @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) @@ -47,11 +46,10 @@ def test_size_period_index(): @pytest.mark.parametrize("as_index", [True, False]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_size_on_categorical(as_index): df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"]) df["A"] = df["A"].astype("category") - result = df.groupby(["A", "B"], as_index=as_index).size() + result = df.groupby(["A", "B"], as_index=as_index, observed=False).size() expected = DataFrame( [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"] diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 5e92ea09b2ec9..cccda30262ca1 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -13,10 +13,10 @@ MultiIndex, Series, Timestamp, + _testing as tm, concat, date_range, ) -import pandas._testing as tm from pandas.core.groupby.groupby import DataError @@ -472,12 +472,7 @@ def test_groupby_transform_with_int(): # int case df = DataFrame( - { - "A": [1, 1, 1, 2, 2, 2], - "B": 1, - "C": [1, 2, 3, 1, 2, 3], - "D": "foo", - } + {"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": [1, 2, 3, 1, 2, 3], "D": "foo",} ) with np.errstate(all="ignore"): result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) @@ -987,7 +982,6 @@ def test_groupby_transform_with_datetimes(func, values): @pytest.mark.parametrize("func", ["cumsum", "cumprod", "cummin", "cummax"]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_transform_absent_categories(func): # GH 16771 # cython transforms with more groups than rows @@ -995,7 +989,7 @@ def test_transform_absent_categories(func): x_cats = range(2) y = [1] df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y}) - result = getattr(df.y.groupby(df.x), func)() + result = getattr(df.y.groupby(df.x, observed=False), func)() expected = df.y tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f3f16493229ed..f9f9c80c06433 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -185,7 +185,6 @@ def test_pivot_table_dropna(self): tm.assert_index_equal(pv_col.columns, m) tm.assert_index_equal(pv_ind.index, m) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_table_categorical(self): cat1 = Categorical( @@ -195,13 +194,14 @@ def test_pivot_table_categorical(self): ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True ) df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - result = pd.pivot_table(df, values="values", index=["A", "B"], dropna=True) + result = pd.pivot_table( + df, values="values", index=["A", "B"], dropna=True, observed=False + ) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index) tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_table_dropna_categoricals(self, dropna): # GH 15193 categories = ["a", "b", "c", "d"] @@ -215,7 +215,9 @@ def test_pivot_table_dropna_categoricals(self, dropna): ) df["A"] = df["A"].astype(CDT(categories, ordered=False)) - result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) + result = df.pivot_table( + index="B", columns="A", values="C", dropna=dropna, observed=False + ) expected_columns = Series(["a", "b", "c"], name="A") expected_columns = expected_columns.astype(CDT(categories, ordered=False)) expected_index = Series([1, 2, 3], name="B") @@ -230,7 +232,6 @@ def test_pivot_table_dropna_categoricals(self, dropna): tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_non_observable_dropna(self, dropna): # gh-21133 df = DataFrame( @@ -244,7 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): } ) - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) expected = DataFrame( {"B": [2, 3]}, index=Index( @@ -269,7 +270,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): } ) - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) expected = DataFrame( {"B": [2, 3, 0]}, index=Index( @@ -282,15 +283,13 @@ def test_pivot_with_non_observable_dropna(self, dropna): tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_interval_index(self, interval_values, dropna): # GH 25814 df = DataFrame({"A": interval_values, "B": 1}) - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A")) tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_pivot_with_interval_index_margins(self): # GH 25815 ordered_cat = pd.IntervalIndex.from_arrays([0, 0, 1, 1], [1, 1, 2, 2]) @@ -305,7 +304,13 @@ def test_pivot_with_interval_index_margins(self): ) pivot_tab = pd.pivot_table( - df, index="C", columns="B", values="A", aggfunc="sum", margins=True + df, + index="C", + columns="B", + values="A", + aggfunc="sum", + margins=True, + observed=False, ) result = pivot_tab["All"] From 0864317e828adce371f0155eb8539a0d2cd6019d Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 3 Dec 2020 17:40:54 -0600 Subject: [PATCH 21/28] Hard code observed behavior to silence warning. See #35967 --- pandas/core/generic.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4a9e020a0fe46..fc0887c47a17a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -87,10 +87,15 @@ from pandas.core.dtypes.missing import isna, notna import pandas as pd -from pandas.core import arraylike, indexing, missing, nanops -import pandas.core.algorithms as algos +from pandas.core import ( + algorithms as algos, + arraylike, + common as com, + indexing, + missing, + nanops, +) from pandas.core.base import PandasObject, SelectionMixin -import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.flags import Flags from pandas.core.indexes import base as ibase @@ -6027,10 +6032,7 @@ def _convert( validate_bool_kwarg(timedelta, "timedelta") return self._constructor( self._mgr.convert( - datetime=datetime, - numeric=numeric, - timedelta=timedelta, - copy=True, + datetime=datetime, numeric=numeric, timedelta=timedelta, copy=True, ) ).__finalize__(self) @@ -6880,10 +6882,7 @@ def replace( f"Expecting {len(to_replace)} got {len(value)} " ) new_data = self._mgr.replace_list( - src_list=to_replace, - dest_list=value, - inplace=inplace, - regex=regex, + src_list=to_replace, dest_list=value, inplace=inplace, regex=regex, ) elif to_replace is None: @@ -10545,7 +10544,8 @@ def pct_change( def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs): if axis is None: raise ValueError("Must specify 'axis' when aggregating by level.") - grouped = self.groupby(level=level, axis=axis, sort=False) + # see pr-35967 for discussion about the observed keyword + grouped = self.groupby(level=level, axis=axis, sort=False, observed=False) if hasattr(grouped, name) and skipna: return getattr(grouped, name)(**kwargs) axis = self._get_axis_number(axis) From 029edd02d9a62d5ff402c2028493819e8691eb63 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 3 Dec 2020 17:46:31 -0600 Subject: [PATCH 22/28] PR in comment --- pandas/tests/groupby/test_grouping.py | 4 ++-- pandas/tests/reshape/test_pivot.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 083f3d8084114..178f7bc0fcd35 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -1,5 +1,4 @@ """ test where we are determining what we are grouping, or getting groups """ - import numpy as np import pytest @@ -11,9 +10,9 @@ MultiIndex, Series, Timestamp, + _testing as tm, date_range, ) -import pandas._testing as tm from pandas.core.groupby.grouper import Grouping # selection @@ -704,6 +703,7 @@ def test_groupby_multiindex_level_empty(self): tm.assert_frame_equal(result, expected) def test_default_observed_deprecated(self): + # pr-35967 df = DataFrame([["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"]) df.x = df.x.astype("category") df.y = df.x.astype("category") diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index f9f9c80c06433..be41a958c2d37 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -2073,6 +2073,7 @@ def agg(arr): foo.pivot_table("notpresent", "X", "Y", aggfunc=agg) def test_pivot_table_observed_deprecated_default(self): + # pr-35967 with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # make sure we actually have a category to warn on self.data.A = self.data.A.astype("category") From 5d15dd1e1c884a3f8a6cceb4a34204e831cac160 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 3 Dec 2020 17:50:12 -0600 Subject: [PATCH 23/28] Hardcode vs. filtering warning --- pandas/core/indexes/base.py | 7 ++++++- pandas/tests/generic/test_duplicate_labels.py | 1 - 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 52ffb1567cb2d..c9ffc9a69281b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -493,7 +493,12 @@ def _format_duplicate_message(self): duplicates = self[self.duplicated(keep="first")].unique() assert len(duplicates) - out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] + # see pr-35967 about the observed keyword + out = ( + Series(np.arange(len(self))) + .groupby(self, observed=False) + .agg(list)[duplicates] + ) if self.nlevels == 1: out = out.rename_axis("label") return out.to_frame(name="positions") diff --git a/pandas/tests/generic/test_duplicate_labels.py b/pandas/tests/generic/test_duplicate_labels.py index aa872b7293519..1b32675ec2d35 100644 --- a/pandas/tests/generic/test_duplicate_labels.py +++ b/pandas/tests/generic/test_duplicate_labels.py @@ -384,7 +384,6 @@ def test_merge_raises(self): ], ids=lambda x: type(x).__name__, ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") def test_raises_basic(idx): msg = "Index has duplicates." with pytest.raises(pd.errors.DuplicateLabelError, match=msg): From fb6e4b0ffff2897b4a3f57f302105d582c3b0236 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Thu, 3 Dec 2020 18:00:44 -0600 Subject: [PATCH 24/28] Blacken --- pandas/core/generic.py | 10 ++++++++-- pandas/tests/groupby/test_categorical.py | 3 ++- pandas/tests/groupby/transform/test_transform.py | 7 ++++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc0887c47a17a..61cdc6b98d919 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6032,7 +6032,10 @@ def _convert( validate_bool_kwarg(timedelta, "timedelta") return self._constructor( self._mgr.convert( - datetime=datetime, numeric=numeric, timedelta=timedelta, copy=True, + datetime=datetime, + numeric=numeric, + timedelta=timedelta, + copy=True, ) ).__finalize__(self) @@ -6882,7 +6885,10 @@ def replace( f"Expecting {len(to_replace)} got {len(value)} " ) new_data = self._mgr.replace_list( - src_list=to_replace, dest_list=value, inplace=inplace, regex=regex, + src_list=to_replace, + dest_list=value, + inplace=inplace, + regex=regex, ) elif to_replace is None: diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index b17a7adeee970..c6cca38cefc84 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -1446,7 +1446,8 @@ def test_series_groupby_categorical_aggregation_getitem(): @pytest.mark.parametrize( - "func, expected_values", [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], + "func, expected_values", + [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])], ) def test_groupby_agg_categorical_columns(func, expected_values): # 31256 diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index cccda30262ca1..618708b80a424 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -472,7 +472,12 @@ def test_groupby_transform_with_int(): # int case df = DataFrame( - {"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": [1, 2, 3, 1, 2, 3], "D": "foo",} + { + "A": [1, 1, 1, 2, 2, 2], + "B": 1, + "C": [1, 2, 3, 1, 2, 3], + "D": "foo", + } ) with np.errstate(all="ignore"): result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) From 363865b007b8d6b36ccda3101c1b331a17d7129f Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 4 Dec 2020 11:42:26 -0600 Subject: [PATCH 25/28] Silence deprecation warning. --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4cc3653abb70..53f72abd8d93f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5677,7 +5677,7 @@ def value_counts( if subset is None: subset = self.columns.tolist() - counts = self.groupby(subset).grouper.size() + counts = self.groupby(subset, observed=True).grouper.size() if sort: counts = counts.sort_values(ascending=ascending) From d4d918bab4391427ad80861c2bdfff55ddc35a2f Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 4 Dec 2020 11:42:52 -0600 Subject: [PATCH 26/28] Hard code default behavior. See #35967 --- pandas/core/reshape/merge.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 2c6cdb846221f..94d8b50cf5597 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -109,13 +109,15 @@ def _groupby_and_merge(by, on, left: "DataFrame", right: "DataFrame", merge_piec if not isinstance(by, (list, tuple)): by = [by] - lby = left.groupby(by, sort=False) + # see pr-35967 for discussion about observed=False + # this is the previous default behavior if the group is a categorical + lby = left.groupby(by, sort=False, observed=False) rby: Optional[groupby.DataFrameGroupBy] = None # if we can groupby the rhs # then we can get vastly better perf if all(item in right.columns for item in by): - rby = right.groupby(by, sort=False) + rby = right.groupby(by, sort=False, observed=False) for key, lhs in lby: From 57d99a79084c8a23c104a96fcad43a85e3476898 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Fri, 4 Dec 2020 11:47:37 -0600 Subject: [PATCH 27/28] Hardcode default categorical behavior. See #35967 --- pandas/plotting/_matplotlib/boxplot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 7122a38db9d0a..82bf1af5da297 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -195,7 +195,7 @@ def _grouped_plot_by_column( return_type=None, **kwargs, ): - grouped = data.groupby(by) + grouped = data.groupby(by, observed=False) if columns is None: if not isinstance(by, (list, tuple)): by = [by] From 8526064e3ee1fee269dd24e97b384fd5061cbcf0 Mon Sep 17 00:00:00 2001 From: Skipper Seabold Date: Mon, 7 Dec 2020 09:14:48 -0600 Subject: [PATCH 28/28] Will raise in the future --- doc/source/user_guide/categorical.rst | 5 ++- pandas/core/groupby/grouper.py | 11 +++--- pandas/core/shared_docs.py | 6 ++-- pandas/tests/groupby/aggregate/test_cython.py | 17 ++++++--- pandas/tests/groupby/aggregate/test_other.py | 5 ++- pandas/tests/groupby/test_categorical.py | 36 +++++++++---------- pandas/tests/groupby/test_function.py | 16 ++++++--- pandas/tests/groupby/test_grouping.py | 4 +-- .../tests/groupby/transform/test_transform.py | 2 +- pandas/tests/reshape/test_pivot.py | 8 ++--- 10 files changed, 63 insertions(+), 47 deletions(-) diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst index b0d93817e57cf..0221bc4101b63 100644 --- a/doc/source/user_guide/categorical.rst +++ b/doc/source/user_guide/categorical.rst @@ -635,9 +635,8 @@ even if some categories are not present in the data: ) df.sum(axis=1, level=1) -Groupby will also show "unused" categories, though this default is deprecated -and will be changed in a future release. It is recommended to use the -``observed`` keyword explicitly as below: +Groupby will also show "unused" categories by default, though this behavior +is deprecated. In a future release, users must specify a value for ``observed``: .. ipython:: python diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 77c32e5eaab80..23b562301aeb1 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -34,12 +34,13 @@ _observed_msg = textwrap.dedent( """\ -Using 'observed=False', because grouping on a categorical. A future version -of pandas will change to 'observed=True'. +Grouping by a categorical but 'observed' was not specified. +Using 'observed=False', but in a future version of pandas +not specifying 'observed' will raise an error. Pass +'observed=True' or 'observed=False' to silence this warning. -To silence the warning and switch to the future behavior, pass 'observed=True'. - -To keep the current behavior and silence the warning, pass 'observed=False'. +See the `groupby` documentation for more information on the +observed keyword. """ ) diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 367adfe7d4a84..92e52a3d174dd 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -120,8 +120,10 @@ If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. - The current default of ``observed=False`` is deprecated and will - change to ``observed=True`` in a future version of pandas. + The current default of ``observed=False`` is deprecated. In + the future this will be a required keyword in the presence + of a categorical grouper and a failure to specify a value will + result in an error. Explicitly pass ``observed=True`` to silence the warning and not show all observed values. diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 34515b585721f..6e96605418731 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -1,13 +1,20 @@ """ test cython .agg behavior """ - import numpy as np import pytest import pandas as pd -from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range -import pandas._testing as tm +from pandas import ( + DataFrame, + Index, + NaT, + Series, + Timedelta, + Timestamp, + _testing as tm, + bdate_range, +) from pandas.core.groupby.groupby import DataError @@ -175,7 +182,7 @@ def test__cython_agg_general(op, targop): ("max", np.max), ], ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_cython_agg_empty_buckets(op, targop, observed): df = DataFrame([11, 12, 13]) grps = range(0, 55, 5) @@ -190,7 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_cython_agg_empty_buckets_nanops(observed): # GH-18869 can't call nanops on empty groups, so hardcode expected # for these diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index b4d166c8d4d08..5138f5de21a4c 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -1,7 +1,6 @@ """ test all other .agg behavior """ - import datetime as dt from functools import partial @@ -15,10 +14,10 @@ MultiIndex, PeriodIndex, Series, + _testing as tm, date_range, period_range, ) -import pandas._testing as tm from pandas.core.base import SpecificationError from pandas.io.formats.printing import pprint_thing @@ -555,7 +554,7 @@ def test_agg_structs_series(structure, expected): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_agg_category_nansum(observed): categories = ["a", "b", "c"] df = DataFrame( diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index c6cca38cefc84..a1b3f7fe2e463 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -212,7 +212,7 @@ def f(x): tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_level_get_group(observed): # GH15155 df = DataFrame( @@ -277,7 +277,7 @@ def test_apply(ordered): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_observed(observed): # multiple groupers, don't re-expand the output space # of the grouper @@ -386,7 +386,7 @@ def test_observed(observed): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_observed_codes_remap(observed): d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]} df = DataFrame(d) @@ -427,7 +427,7 @@ def test_observed_perf(): assert result.index.levels[2].nunique() == df.other_id.nunique() -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_observed_groups(observed): # gh-20583 # test that we have the appropriate groups @@ -450,7 +450,7 @@ def test_observed_groups(observed): tm.assert_dict_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_observed_groups_with_nan(observed): # GH 24740 df = DataFrame( @@ -487,7 +487,7 @@ def test_observed_nth(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_dataframe_categorical_with_nan(observed): # GH 21151 s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"]) @@ -511,7 +511,7 @@ def test_dataframe_categorical_with_nan(observed): @pytest.mark.parametrize("ordered", [True, False]) @pytest.mark.parametrize("observed", [True, False]) @pytest.mark.parametrize("sort", [True, False]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort): # GH 25871: Fix groupby sorting on ordered Categoricals # GH 25167: Groupby with observed=True doesn't sort @@ -1176,7 +1176,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs): @pytest.mark.parametrize("operation", ["agg", "apply"]) @pytest.mark.parametrize("observed", [False, None]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): # GH 24880 index, _ = MultiIndex.from_product( @@ -1241,7 +1241,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): ), ], ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data): # GH 24880 expected = Series(data=data, index=index, name="C") @@ -1259,7 +1259,7 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat): @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_groupby_categorical_axis_1(code): # GH 13420 df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]}) @@ -1269,7 +1269,7 @@ def test_groupby_categorical_axis_1(code): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_groupby_cat_preserves_structure(observed, ordered): # GH 28787 df = DataFrame( @@ -1298,7 +1298,7 @@ def test_get_nonexistent_category(): ) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request): # GH 17605 if reduction_func == "ngroup": @@ -1398,7 +1398,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun @pytest.mark.parametrize("observed", [False, None]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( reduction_func, observed, request ): @@ -1432,7 +1432,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false( assert (res.loc[unobserved_cats] == expected).all().all() -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_series_groupby_categorical_aggregation_getitem(): # GH 8870 d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]} @@ -1488,7 +1488,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_read_only_category_no_sort(): # GH33410 cats = np.array([1, 2]) @@ -1502,7 +1502,7 @@ def test_read_only_category_no_sort(): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_sorted_missing_category_values(): # GH 28597 df = DataFrame( @@ -1650,7 +1650,7 @@ def test_categorical_transform(): @pytest.mark.parametrize("func", ["first", "last"]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( func: str, observed: bool ): @@ -1676,7 +1676,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals( @pytest.mark.parametrize("func", ["first", "last"]) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals( func: str, observed: bool ): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 6f202ef4bf97a..cc0c6c61e7e56 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -7,9 +7,17 @@ from pandas.errors import UnsupportedFunctionCall import pandas as pd -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna -import pandas._testing as tm -import pandas.core.nanops as nanops +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + _testing as tm, + date_range, + isna, +) +from pandas.core import nanops as nanops from pandas.util import _test_decorators as td @@ -410,7 +418,7 @@ def test_cython_median(): tm.assert_frame_equal(rs, xp) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_median_empty_bins(observed): df = DataFrame(np.random.randint(0, 44, 500)) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 178f7bc0fcd35..979b01371247f 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -310,7 +310,7 @@ def test_groupby_levels_and_columns(self): by_columns.columns = by_columns.columns.astype(np.int64) tm.assert_frame_equal(by_levels, by_columns) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_groupby_categorical_index_and_columns(self, observed): # GH18432, adapted for GH25871 columns = ["A", "B", "A", "B"] @@ -778,7 +778,7 @@ def test_get_group(self): with pytest.raises(ValueError, match=msg): g.get_group(("foo", "bar", "baz")) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_get_group_empty_bins(self, observed): d = DataFrame([3, 1, 7, 6]) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 618708b80a424..71e182f34bb0a 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -1153,7 +1153,7 @@ def test_transform_lambda_indexing(): tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") +@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_categorical_and_not_categorical_key(observed): # Checks that groupby-transform, when grouping by both a categorical # and a non-categorical key, doesn't try to expand the output to include diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index be41a958c2d37..11fef6f271672 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -108,7 +108,7 @@ def test_pivot_table(self, observed): expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack() tm.assert_frame_equal(table, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_pivot_table_categorical_observed_equal(self, observed): # issue #24923 df = DataFrame( @@ -1763,7 +1763,7 @@ def test_margins_casted_to_float(self, observed): ) tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_pivot_with_categorical(self, observed, ordered): # gh-21370 idx = [np.nan, "low", "high", "low", np.nan] @@ -1799,7 +1799,7 @@ def test_pivot_with_categorical(self, observed, ordered): tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_categorical_aggfunc(self, observed): # GH 9534 df = DataFrame( @@ -1820,7 +1820,7 @@ def test_categorical_aggfunc(self, observed): ) tm.assert_frame_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning") + @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning") def test_categorical_pivot_index_ordering(self, observed): # GH 8731 df = DataFrame(