diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 17120f30c21f2..748fc53f4ad2e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -104,6 +104,7 @@ Deprecations Removal of prior version deprecations/changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`) +- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`) - Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`) - Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 771554f2b0f2b..ed823c4fb1a08 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8884,7 +8884,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool | lib.NoDefault = lib.no_default, + observed: bool = True, dropna: bool = True, ) -> DataFrameGroupBy: from pandas.core.groupby.generic import DataFrameGroupBy @@ -9093,10 +9093,9 @@ def pivot( If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. - .. deprecated:: 2.2.0 + .. versionchanged:: 3.0.0 - The default value of ``False`` is deprecated and will change to - ``True`` in a future version of pandas. + The default value is now ``True``. sort : bool, default True Specifies if the result should be sorted. @@ -9208,7 +9207,7 @@ def pivot_table( margins: bool = False, dropna: bool = True, margins_name: Level = "All", - observed: bool | lib.NoDefault = lib.no_default, + observed: bool = True, sort: bool = True, ) -> DataFrame: from pandas.core.reshape.pivot import pivot_table diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index afd37ca684a08..2af30a4185323 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1117,7 +1117,7 @@ def __init__( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool | lib.NoDefault = lib.no_default, + observed: bool = False, dropna: bool = True, ) -> None: self._selection = selection @@ -1137,23 +1137,11 @@ def __init__( keys, level=level, sort=sort, - observed=False if observed is lib.no_default else observed, + observed=observed, dropna=self.dropna, ) - if observed is lib.no_default: - if any(ping._passed_categorical for ping in grouper.groupings): - warnings.warn( - "The default of observed=False is deprecated and will be changed " - "to True in a future version of pandas. Pass observed=False to " - "retain current behavior or observed=True to adopt the future " - "default and silence this warning.", - FutureWarning, - stacklevel=find_stack_level(), - ) - observed = False self.observed = observed - self.obj = obj self._grouper = grouper self.exclusions = frozenset(exclusions) if exclusions else frozenset() diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 7d563ed7b62f6..bd927472f06d3 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -10,7 +10,6 @@ Literal, cast, ) -import warnings import numpy as np @@ -19,7 +18,6 @@ Appender, Substitution, ) -from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( @@ -70,7 +68,7 @@ def pivot_table( margins: bool = False, dropna: bool = True, margins_name: Hashable = "All", - observed: bool | lib.NoDefault = lib.no_default, + observed: bool = True, sort: bool = True, ) -> DataFrame: index = _convert_by(index) @@ -125,7 +123,7 @@ def __internal_pivot_table( margins: bool, dropna: bool, margins_name: Hashable, - observed: bool | lib.NoDefault, + observed: bool, sort: bool, ) -> DataFrame: """ @@ -168,18 +166,7 @@ def __internal_pivot_table( pass values = list(values) - observed_bool = False if observed is lib.no_default else observed - grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna) - if observed is lib.no_default and any( - ping._passed_categorical for ping in grouped._grouper.groupings - ): - warnings.warn( - "The default value of observed=False is deprecated and will change " - "to observed=True in a future version of pandas. Specify " - "observed=False to silence this warning and retain the current behavior", - category=FutureWarning, - stacklevel=find_stack_level(), - ) + grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna) agged = grouped.agg(aggfunc) if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns): diff --git a/pandas/core/series.py b/pandas/core/series.py index 65ef7d352a547..75b02d06294a3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1932,7 +1932,7 @@ def groupby( as_index: bool = True, sort: bool = True, group_keys: bool = True, - observed: bool | lib.NoDefault = lib.no_default, + observed: bool = False, dropna: bool = True, ) -> SeriesGroupBy: from pandas.core.groupby.generic import SeriesGroupBy diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py index 9d693f034d911..787a03471cf6e 100644 --- a/pandas/core/shared_docs.py +++ b/pandas/core/shared_docs.py @@ -148,14 +148,14 @@ ``group_keys`` now defaults to ``True``. -observed : bool, default False +observed : bool, default True This only applies if any of the groupers are Categoricals. If True: only show observed values for categorical groupers. If False: show all values for categorical groupers. - .. deprecated:: 2.1.0 + .. versionchanged:: 3.0.0 - The default value will change to True in a future version of pandas. + The default value is now ``True``. dropna : bool, default True If True, and if group keys contain NA values, NA values together diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 7df1b822e516a..d8014558c52b2 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -2088,18 +2088,6 @@ def test_many_categories(as_index, sort, index_kind, ordered): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]]) -@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]]) -def test_groupby_default_depr(cat_columns, keys): - # GH#43999 - df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]}) - df[cat_columns] = df[cat_columns].astype("category") - msg = "The default of observed=False is deprecated" - klass = FutureWarning if set(cat_columns) & set(keys) else None - with tm.assert_produces_warning(klass, match=msg): - df.groupby(keys) - - @pytest.mark.parametrize("test_series", [True, False]) @pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]]) def test_agg_list(request, as_index, observed, reduction_func, test_series, keys): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 11e3b5e205bdc..d6b61bae850af 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -192,9 +192,9 @@ def test_pivot_table_categorical(self): ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True ) df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]}) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = pivot_table(df, values="values", index=["A", "B"], dropna=True) + result = pivot_table( + df, values="values", index=["A", "B"], dropna=True, observed=False + ) exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"]) expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index) @@ -213,9 +213,9 @@ def test_pivot_table_dropna_categoricals(self, dropna): ) df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False)) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna) + result = df.pivot_table( + index="B", columns="A", values="C", dropna=dropna, observed=False + ) expected_columns = Series(["a", "b", "c"], name="A") expected_columns = expected_columns.astype( CategoricalDtype(categories, ordered=False) @@ -245,9 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna): } ) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) if dropna: values = [2.0, 3.0] codes = [0, 1] @@ -278,9 +276,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna): } ) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) expected = DataFrame( {"B": [2.0, 3.0, 0.0]}, index=Index( @@ -304,9 +300,7 @@ def test_pivot_with_interval_index(self, left_right, dropna, closed): interval_values = Categorical(pd.IntervalIndex.from_arrays(left, right, closed)) df = DataFrame({"A": interval_values, "B": 1}) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table(index="A", values="B", dropna=dropna) + result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False) expected = DataFrame( {"B": 1.0}, index=Index(interval_values.unique(), name="A") ) @@ -327,11 +321,15 @@ def test_pivot_with_interval_index_margins(self): } ) - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - pivot_tab = pivot_table( - df, index="C", columns="B", values="A", aggfunc="sum", margins=True - ) + pivot_tab = pivot_table( + df, + index="C", + columns="B", + values="A", + aggfunc="sum", + margins=True, + observed=False, + ) result = pivot_tab["All"] expected = Series( @@ -1830,9 +1828,9 @@ def test_categorical_margins_category(self, observed): df.y = df.y.astype("category") df.z = df.z.astype("category") - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - table = df.pivot_table("x", "y", "z", dropna=observed, margins=True) + table = df.pivot_table( + "x", "y", "z", dropna=observed, margins=True, observed=False + ) tm.assert_frame_equal(table, expected) def test_margins_casted_to_float(self): @@ -1894,11 +1892,14 @@ def test_categorical_aggfunc(self, observed): {"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]} ) df["C1"] = df["C1"].astype("category") - msg = "The default value of observed=False is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.pivot_table( - "V", index="C1", columns="C2", dropna=observed, aggfunc="count" - ) + result = df.pivot_table( + "V", + index="C1", + columns="C2", + dropna=observed, + aggfunc="count", + observed=False, + ) expected_index = pd.CategoricalIndex( ["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"