Skip to content

CLN: Enforce change in default value of observed #57330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Feb 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ Deprecations
Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)
- Removed ``DataFrameGroupBy.grouper`` and ``SeriesGroupBy.grouper`` (:issue:`56521`)
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -8884,7 +8884,7 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
observed: bool | lib.NoDefault = lib.no_default,
observed: bool = True,
dropna: bool = True,
) -> DataFrameGroupBy:
from pandas.core.groupby.generic import DataFrameGroupBy
Expand Down Expand Up @@ -9093,10 +9093,9 @@ def pivot(
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.

.. deprecated:: 2.2.0
.. versionchanged:: 3.0.0

The default value of ``False`` is deprecated and will change to
``True`` in a future version of pandas.
The default value is now ``True``.

sort : bool, default True
Specifies if the result should be sorted.
Expand Down Expand Up @@ -9208,7 +9207,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Level = "All",
observed: bool | lib.NoDefault = lib.no_default,
observed: bool = True,
sort: bool = True,
) -> DataFrame:
from pandas.core.reshape.pivot import pivot_table
Expand Down
16 changes: 2 additions & 14 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1117,7 +1117,7 @@ def __init__(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
observed: bool | lib.NoDefault = lib.no_default,
observed: bool = False,
dropna: bool = True,
) -> None:
self._selection = selection
Expand All @@ -1137,23 +1137,11 @@ def __init__(
keys,
level=level,
sort=sort,
observed=False if observed is lib.no_default else observed,
observed=observed,
dropna=self.dropna,
)

if observed is lib.no_default:
if any(ping._passed_categorical for ping in grouper.groupings):
warnings.warn(
"The default of observed=False is deprecated and will be changed "
"to True in a future version of pandas. Pass observed=False to "
"retain current behavior or observed=True to adopt the future "
"default and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
observed = False
self.observed = observed

self.obj = obj
self._grouper = grouper
self.exclusions = frozenset(exclusions) if exclusions else frozenset()
Expand Down
19 changes: 3 additions & 16 deletions pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
Literal,
cast,
)
import warnings

import numpy as np

Expand All @@ -19,7 +18,6 @@
Appender,
Substitution,
)
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import maybe_downcast_to_dtype
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -70,7 +68,7 @@ def pivot_table(
margins: bool = False,
dropna: bool = True,
margins_name: Hashable = "All",
observed: bool | lib.NoDefault = lib.no_default,
observed: bool = True,
sort: bool = True,
) -> DataFrame:
index = _convert_by(index)
Expand Down Expand Up @@ -125,7 +123,7 @@ def __internal_pivot_table(
margins: bool,
dropna: bool,
margins_name: Hashable,
observed: bool | lib.NoDefault,
observed: bool,
sort: bool,
) -> DataFrame:
"""
Expand Down Expand Up @@ -168,18 +166,7 @@ def __internal_pivot_table(
pass
values = list(values)

observed_bool = False if observed is lib.no_default else observed
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
if observed is lib.no_default and any(
ping._passed_categorical for ping in grouped._grouper.groupings
):
warnings.warn(
"The default value of observed=False is deprecated and will change "
"to observed=True in a future version of pandas. Specify "
"observed=False to silence this warning and retain the current behavior",
category=FutureWarning,
stacklevel=find_stack_level(),
)
grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
agged = grouped.agg(aggfunc)

if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1932,7 +1932,7 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
observed: bool | lib.NoDefault = lib.no_default,
observed: bool = False,
dropna: bool = True,
) -> SeriesGroupBy:
from pandas.core.groupby.generic import SeriesGroupBy
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/shared_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,14 @@

``group_keys`` now defaults to ``True``.

observed : bool, default False
observed : bool, default True
This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
If False: show all values for categorical groupers.

.. deprecated:: 2.1.0
.. versionchanged:: 3.0.0

The default value will change to True in a future version of pandas.
The default value is now ``True``.

dropna : bool, default True
If True, and if group keys contain NA values, NA values together
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2088,18 +2088,6 @@ def test_many_categories(as_index, sort, index_kind, ordered):
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize("cat_columns", ["a", "b", ["a", "b"]])
@pytest.mark.parametrize("keys", ["a", "b", ["a", "b"]])
def test_groupby_default_depr(cat_columns, keys):
# GH#43999
df = DataFrame({"a": [1, 1, 2, 3], "b": [4, 5, 6, 7]})
df[cat_columns] = df[cat_columns].astype("category")
msg = "The default of observed=False is deprecated"
klass = FutureWarning if set(cat_columns) & set(keys) else None
with tm.assert_produces_warning(klass, match=msg):
df.groupby(keys)


@pytest.mark.parametrize("test_series", [True, False])
@pytest.mark.parametrize("keys", [["a1"], ["a1", "a2"]])
def test_agg_list(request, as_index, observed, reduction_func, test_series, keys):
Expand Down
57 changes: 29 additions & 28 deletions pandas/tests/reshape/test_pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,9 @@ def test_pivot_table_categorical(self):
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
)
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
result = pivot_table(
df, values="values", index=["A", "B"], dropna=True, observed=False
)

exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
Expand All @@ -213,9 +213,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
)

df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
result = df.pivot_table(
index="B", columns="A", values="C", dropna=dropna, observed=False
)
expected_columns = Series(["a", "b", "c"], name="A")
expected_columns = expected_columns.astype(
CategoricalDtype(categories, ordered=False)
Expand Down Expand Up @@ -245,9 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
}
)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
if dropna:
values = [2.0, 3.0]
codes = [0, 1]
Expand Down Expand Up @@ -278,9 +276,7 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
}
)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
expected = DataFrame(
{"B": [2.0, 3.0, 0.0]},
index=Index(
Expand All @@ -304,9 +300,7 @@ def test_pivot_with_interval_index(self, left_right, dropna, closed):
interval_values = Categorical(pd.IntervalIndex.from_arrays(left, right, closed))
df = DataFrame({"A": interval_values, "B": 1})

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(index="A", values="B", dropna=dropna)
result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
expected = DataFrame(
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
)
Expand All @@ -327,11 +321,15 @@ def test_pivot_with_interval_index_margins(self):
}
)

msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
pivot_tab = pivot_table(
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
)
pivot_tab = pivot_table(
df,
index="C",
columns="B",
values="A",
aggfunc="sum",
margins=True,
observed=False,
)

result = pivot_tab["All"]
expected = Series(
Expand Down Expand Up @@ -1830,9 +1828,9 @@ def test_categorical_margins_category(self, observed):

df.y = df.y.astype("category")
df.z = df.z.astype("category")
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
table = df.pivot_table(
"x", "y", "z", dropna=observed, margins=True, observed=False
)
tm.assert_frame_equal(table, expected)

def test_margins_casted_to_float(self):
Expand Down Expand Up @@ -1894,11 +1892,14 @@ def test_categorical_aggfunc(self, observed):
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
)
df["C1"] = df["C1"].astype("category")
msg = "The default value of observed=False is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg):
result = df.pivot_table(
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
)
result = df.pivot_table(
"V",
index="C1",
columns="C2",
dropna=observed,
aggfunc="count",
observed=False,
)

expected_index = pd.CategoricalIndex(
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"
Expand Down