Skip to content

Commit 4fd5a15

Browse files
authored
DEPR: Default of observed=False in DataFrame.pivot_table (#56237)
* DEPR: Default of observed=False in DataFrame.pivot_table * Finish up * fixup * Convert to code-block * Kickoff builds
1 parent 7b528c9 commit 4fd5a15

File tree

6 files changed

+80
-22
lines changed

6 files changed

+80
-22
lines changed

doc/source/user_guide/categorical.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ Pivot tables:
647647
648648
raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
649649
df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
650-
pd.pivot_table(df, values="values", index=["A", "B"])
650+
pd.pivot_table(df, values="values", index=["A", "B"], observed=False)
651651
652652
Data munging
653653
------------

doc/source/whatsnew/v0.23.0.rst

+26-5
Original file line numberDiff line numberDiff line change
@@ -286,12 +286,33 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
286286
df = pd.DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
287287
df
288288
289-
.. ipython:: python
290289
291-
pd.pivot_table(df, values='values', index=['A', 'B'],
292-
dropna=True)
293-
pd.pivot_table(df, values='values', index=['A', 'B'],
294-
dropna=False)
290+
.. code-block:: ipython
291+
292+
In [1]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=True)
293+
294+
Out[1]:
295+
values
296+
A B
297+
a c 1.0
298+
d 2.0
299+
b c 3.0
300+
d 4.0
301+
302+
In [2]: pd.pivot_table(df, values='values', index=['A', 'B'], dropna=False)
303+
304+
Out[2]:
305+
values
306+
A B
307+
a c 1.0
308+
d 2.0
309+
y NaN
310+
b c 3.0
311+
d 4.0
312+
y NaN
313+
z c NaN
314+
d NaN
315+
y NaN
295316
296317
297318
.. _whatsnew_0230.enhancements.window_raw:

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,7 @@ Other Deprecations
435435
- Deprecated the ``ordinal`` keyword in :class:`PeriodIndex`, use :meth:`PeriodIndex.from_ordinals` instead (:issue:`55960`)
436436
- Deprecated the ``unit`` keyword in :class:`TimedeltaIndex` construction, use :func:`to_timedelta` instead (:issue:`55499`)
437437
- Deprecated the behavior of :meth:`Series.value_counts` and :meth:`Index.value_counts` with object dtype; in a future version these will not perform dtype inference on the resulting :class:`Index`, do ``result.index = result.index.infer_objects()`` to retain the old behavior (:issue:`56161`)
438+
- Deprecated the default of ``observed=False`` in :meth:`DataFrame.pivot_table`; will be ``True`` in a future version (:issue:`56236`)
438439
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
439440
- Deprecated the option ``mode.data_manager`` and the ``ArrayManager``; only the ``BlockManager`` will be available in future versions (:issue:`55043`)
440441
- Deprecated the previous implementation of :class:`DataFrame.stack`; specify ``future_stack=True`` to adopt the future version (:issue:`53515`)

pandas/core/frame.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -9296,6 +9296,11 @@ def pivot(
92969296
If True: only show observed values for categorical groupers.
92979297
If False: show all values for categorical groupers.
92989298
9299+
.. deprecated:: 2.2.0
9300+
9301+
The default value of ``False`` is deprecated and will change to
9302+
``True`` in a future version of pandas.
9303+
92999304
sort : bool, default True
93009305
Specifies if the result should be sorted.
93019306
@@ -9406,7 +9411,7 @@ def pivot_table(
94069411
margins: bool = False,
94079412
dropna: bool = True,
94089413
margins_name: Level = "All",
9409-
observed: bool = False,
9414+
observed: bool | lib.NoDefault = lib.no_default,
94109415
sort: bool = True,
94119416
) -> DataFrame:
94129417
from pandas.core.reshape.pivot import pivot_table

pandas/core/reshape/pivot.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
Literal,
1111
cast,
1212
)
13+
import warnings
1314

1415
import numpy as np
1516

@@ -18,6 +19,7 @@
1819
Appender,
1920
Substitution,
2021
)
22+
from pandas.util._exceptions import find_stack_level
2123

2224
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
2325
from pandas.core.dtypes.common import (
@@ -68,7 +70,7 @@ def pivot_table(
6870
margins: bool = False,
6971
dropna: bool = True,
7072
margins_name: Hashable = "All",
71-
observed: bool = False,
73+
observed: bool | lib.NoDefault = lib.no_default,
7274
sort: bool = True,
7375
) -> DataFrame:
7476
index = _convert_by(index)
@@ -123,7 +125,7 @@ def __internal_pivot_table(
123125
margins: bool,
124126
dropna: bool,
125127
margins_name: Hashable,
126-
observed: bool,
128+
observed: bool | lib.NoDefault,
127129
sort: bool,
128130
) -> DataFrame:
129131
"""
@@ -166,7 +168,18 @@ def __internal_pivot_table(
166168
pass
167169
values = list(values)
168170

169-
grouped = data.groupby(keys, observed=observed, sort=sort, dropna=dropna)
171+
observed_bool = False if observed is lib.no_default else observed
172+
grouped = data.groupby(keys, observed=observed_bool, sort=sort, dropna=dropna)
173+
if observed is lib.no_default and any(
174+
ping._passed_categorical for ping in grouped.grouper.groupings
175+
):
176+
warnings.warn(
177+
"The default value of observed=False is deprecated and will change "
178+
"to observed=True in a future version of pandas. Specify "
179+
"observed=False to silence this warning and retain the current behavior",
180+
category=FutureWarning,
181+
stacklevel=find_stack_level(),
182+
)
170183
agged = grouped.agg(aggfunc)
171184

172185
if dropna and isinstance(agged, ABCDataFrame) and len(agged.columns):
@@ -719,6 +732,7 @@ def crosstab(
719732
margins=margins,
720733
margins_name=margins_name,
721734
dropna=dropna,
735+
observed=False,
722736
**kwargs, # type: ignore[arg-type]
723737
)
724738

pandas/tests/reshape/test_pivot.py

+29-12
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,9 @@ def test_pivot_table_categorical(self):
201201
["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
202202
)
203203
df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
204-
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
204+
msg = "The default value of observed=False is deprecated"
205+
with tm.assert_produces_warning(FutureWarning, match=msg):
206+
result = pivot_table(df, values="values", index=["A", "B"], dropna=True)
205207

206208
exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
207209
expected = DataFrame({"values": [1.0, 2.0, 3.0, 4.0]}, index=exp_index)
@@ -220,7 +222,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
220222
)
221223

222224
df["A"] = df["A"].astype(CategoricalDtype(categories, ordered=False))
223-
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
225+
msg = "The default value of observed=False is deprecated"
226+
with tm.assert_produces_warning(FutureWarning, match=msg):
227+
result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
224228
expected_columns = Series(["a", "b", "c"], name="A")
225229
expected_columns = expected_columns.astype(
226230
CategoricalDtype(categories, ordered=False)
@@ -250,7 +254,9 @@ def test_pivot_with_non_observable_dropna(self, dropna):
250254
}
251255
)
252256

253-
result = df.pivot_table(index="A", values="B", dropna=dropna)
257+
msg = "The default value of observed=False is deprecated"
258+
with tm.assert_produces_warning(FutureWarning, match=msg):
259+
result = df.pivot_table(index="A", values="B", dropna=dropna)
254260
if dropna:
255261
values = [2.0, 3.0]
256262
codes = [0, 1]
@@ -283,7 +289,9 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
283289
}
284290
)
285291

286-
result = df.pivot_table(index="A", values="B", dropna=dropna)
292+
msg = "The default value of observed=False is deprecated"
293+
with tm.assert_produces_warning(FutureWarning, match=msg):
294+
result = df.pivot_table(index="A", values="B", dropna=dropna)
287295
expected = DataFrame(
288296
{"B": [2.0, 3.0, 0.0]},
289297
index=Index(
@@ -301,7 +309,10 @@ def test_pivot_with_non_observable_dropna_multi_cat(self, dropna):
301309
def test_pivot_with_interval_index(self, interval_values, dropna):
302310
# GH 25814
303311
df = DataFrame({"A": interval_values, "B": 1})
304-
result = df.pivot_table(index="A", values="B", dropna=dropna)
312+
313+
msg = "The default value of observed=False is deprecated"
314+
with tm.assert_produces_warning(FutureWarning, match=msg):
315+
result = df.pivot_table(index="A", values="B", dropna=dropna)
305316
expected = DataFrame(
306317
{"B": 1.0}, index=Index(interval_values.unique(), name="A")
307318
)
@@ -322,9 +333,11 @@ def test_pivot_with_interval_index_margins(self):
322333
}
323334
)
324335

325-
pivot_tab = pivot_table(
326-
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
327-
)
336+
msg = "The default value of observed=False is deprecated"
337+
with tm.assert_produces_warning(FutureWarning, match=msg):
338+
pivot_tab = pivot_table(
339+
df, index="C", columns="B", values="A", aggfunc="sum", margins=True
340+
)
328341

329342
result = pivot_tab["All"]
330343
expected = Series(
@@ -1827,7 +1840,9 @@ def test_categorical_margins_category(self, observed):
18271840

18281841
df.y = df.y.astype("category")
18291842
df.z = df.z.astype("category")
1830-
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
1843+
msg = "The default value of observed=False is deprecated"
1844+
with tm.assert_produces_warning(FutureWarning, match=msg):
1845+
table = df.pivot_table("x", "y", "z", dropna=observed, margins=True)
18311846
tm.assert_frame_equal(table, expected)
18321847

18331848
def test_margins_casted_to_float(self):
@@ -1889,9 +1904,11 @@ def test_categorical_aggfunc(self, observed):
18891904
{"C1": ["A", "B", "C", "C"], "C2": ["a", "a", "b", "b"], "V": [1, 2, 3, 4]}
18901905
)
18911906
df["C1"] = df["C1"].astype("category")
1892-
result = df.pivot_table(
1893-
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
1894-
)
1907+
msg = "The default value of observed=False is deprecated"
1908+
with tm.assert_produces_warning(FutureWarning, match=msg):
1909+
result = df.pivot_table(
1910+
"V", index="C1", columns="C2", dropna=observed, aggfunc="count"
1911+
)
18951912

18961913
expected_index = pd.CategoricalIndex(
18971914
["A", "B", "C"], categories=["A", "B", "C"], ordered=False, name="C1"

0 commit comments

Comments
 (0)