Will raise in the future

jseabold · jseabold · commit 739833a5cd1a · 2020-12-07T11:25:53.000-06:00
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
@@ -635,9 +635,8 @@ even if some categories are not present in the data:
     )
     df.sum(axis=1, level=1)
 
-Groupby will also show "unused" categories, though this default is deprecated
-and will be changed in a future release. It is recommended to use the
-``observed`` keyword explicitly as below:
+Groupby will also show "unused" categories by default, though this behavior
+is deprecated. In a future release, users must specify a value for ``observed``:
 
 .. ipython:: python
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -34,12 +34,13 @@
 
 _observed_msg = textwrap.dedent(
     """\
-Using 'observed=False', because grouping on a categorical. A future version
-of pandas will change to 'observed=True'.
+Grouping by a categorical but 'observed' was not specified.
+Using 'observed=False', but in a future version of pandas
+not specifying 'observed' will raise an error. Pass
+'observed=True' or 'observed=False' to silence this warning.
 
-To silence the warning and switch to the future behavior, pass 'observed=True'.
-
-To keep the current behavior and silence the warning, pass 'observed=False'.
+See the `groupby` documentation for more information on the
+observed keyword.
 """
 )
 
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -120,8 +120,10 @@
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
 
-    The current default of ``observed=False`` is deprecated and will
-    change to ``observed=True`` in a future version of pandas.
+    The current default of ``observed=False`` is deprecated. In
+    the future this will be a required keyword in the presence
+    of a categorical grouper and a failure to specify a value will
+    result in an error.
 
     Explicitly pass ``observed=True`` to silence the warning and not
     show all observed values.
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
@@ -1,13 +1,20 @@
 """
 test cython .agg behavior
 """
-
 import numpy as np
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range
-import pandas._testing as tm
+from pandas import (
+    DataFrame,
+    Index,
+    NaT,
+    Series,
+    Timedelta,
+    Timestamp,
+    _testing as tm,
+    bdate_range,
+)
 from pandas.core.groupby.groupby import DataError
 
 
@@ -175,7 +182,7 @@ def test__cython_agg_general(op, targop):
         ("max", np.max),
     ],
 )
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets(op, targop, observed):
     df = DataFrame([11, 12, 13])
     grps = range(0, 55, 5)
@@ -190,7 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets_nanops(observed):
     # GH-18869 can't call nanops on empty groups, so hardcode expected
     # for these
@@ -283,18 +290,7 @@ def test_read_only_buffer_source_agg(agg):
 
 @pytest.mark.parametrize(
     "op_name",
-    [
-        "count",
-        "sum",
-        "std",
-        "var",
-        "sem",
-        "mean",
-        "median",
-        "prod",
-        "min",
-        "max",
-    ],
+    ["count", "sum", "std", "var", "sem", "mean", "median", "prod", "min", "max",],
 )
 def test_cython_agg_nullable_int(op_name):
     # ensure that the cython-based aggregations don't fail for nullable dtype
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -1,7 +1,6 @@
 """
 test all other .agg behavior
 """
-
 import datetime as dt
 from functools import partial
 
@@ -15,10 +14,10 @@
     MultiIndex,
     PeriodIndex,
     Series,
+    _testing as tm,
     date_range,
     period_range,
 )
-import pandas._testing as tm
 from pandas.core.base import SpecificationError
 
 from pandas.io.formats.printing import pprint_thing
@@ -515,14 +514,8 @@ def test_sum_uint64_overflow():
     [
         (tuple, DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}})),
         (list, DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}})),
-        (
-            lambda x: tuple(x),
-            DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}),
-        ),
-        (
-            lambda x: list(x),
-            DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}),
-        ),
+        (lambda x: tuple(x), DataFrame({"C": {(1, 1): (1, 1, 1), (3, 4): (3, 4, 4)}}),),
+        (lambda x: list(x), DataFrame({"C": {(1, 1): [1, 1, 1], (3, 4): [3, 4, 4]}}),),
     ],
 )
 def test_agg_structs_dataframe(structure, expected):
@@ -555,7 +548,7 @@ def test_agg_structs_series(structure, expected):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_agg_category_nansum(observed):
     categories = ["a", "b", "c"]
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
@@ -212,7 +212,7 @@ def f(x):
     tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_level_get_group(observed):
     # GH15155
     df = DataFrame(
@@ -277,7 +277,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
@@ -386,7 +386,7 @@ def test_observed(observed):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_codes_remap(observed):
     d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]}
     df = DataFrame(d)
@@ -427,7 +427,7 @@ def test_observed_perf():
     assert result.index.levels[2].nunique() == df.other_id.nunique()
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups(observed):
     # gh-20583
     # test that we have the appropriate groups
@@ -450,7 +450,7 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups_with_nan(observed):
     # GH 24740
     df = DataFrame(
@@ -487,7 +487,7 @@ def test_observed_nth():
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_with_nan(observed):
     # GH 21151
     s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])
@@ -511,7 +511,7 @@ def test_dataframe_categorical_with_nan(observed):
 @pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize("observed", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25871: Fix groupby sorting on ordered Categoricals
     # GH 25167: Groupby with observed=True doesn't sort
@@ -1176,7 +1176,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
 
 @pytest.mark.parametrize("operation", ["agg", "apply"])
 @pytest.mark.parametrize("observed", [False, None])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
     index, _ = MultiIndex.from_product(
@@ -1241,7 +1241,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
         ),
     ],
 )
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
     # GH 24880
     expected = Series(data=data, index=index, name="C")
@@ -1259,7 +1259,7 @@ def test_groupby_categorical_series_dataframe_consistent(df_cat):
 
 
 @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_categorical_axis_1(code):
     # GH 13420
     df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]})
@@ -1269,7 +1269,7 @@ def test_groupby_categorical_axis_1(code):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_cat_preserves_structure(observed, ordered):
     # GH 28787
     df = DataFrame(
@@ -1298,7 +1298,7 @@ def test_get_nonexistent_category():
         )
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request):
     # GH 17605
     if reduction_func == "ngroup":
@@ -1398,7 +1398,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun
 
 
 @pytest.mark.parametrize("observed", [False, None])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
     reduction_func, observed, request
 ):
@@ -1432,7 +1432,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
         assert (res.loc[unobserved_cats] == expected).all().all()
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_categorical_aggregation_getitem():
     # GH 8870
     d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
@@ -1446,8 +1446,7 @@ def test_series_groupby_categorical_aggregation_getitem():
 
 
 @pytest.mark.parametrize(
-    "func, expected_values",
-    [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])],
+    "func, expected_values", [(Series.nunique, [1, 1, 2]), (Series.count, [1, 2, 2])],
 )
 def test_groupby_agg_categorical_columns(func, expected_values):
     # 31256
@@ -1488,7 +1487,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func):
     tm.assert_series_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_read_only_category_no_sort():
     # GH33410
     cats = np.array([1, 2])
@@ -1502,7 +1501,7 @@ def test_read_only_category_no_sort():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_sorted_missing_category_values():
     # GH 28597
     df = DataFrame(
@@ -1650,7 +1649,7 @@ def test_categorical_transform():
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
@@ -1676,7 +1675,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -7,9 +7,17 @@
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
-import pandas._testing as tm
-import pandas.core.nanops as nanops
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    _testing as tm,
+    date_range,
+    isna,
+)
+from pandas.core import nanops as nanops
 from pandas.util import _test_decorators as td
 
 
@@ -410,7 +418,7 @@ def test_cython_median():
     tm.assert_frame_equal(rs, xp)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_median_empty_bins(observed):
     df = DataFrame(np.random.randint(0, 44, 500))
 
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
@@ -310,7 +310,7 @@ def test_groupby_levels_and_columns(self):
         by_columns.columns = by_columns.columns.astype(np.int64)
         tm.assert_frame_equal(by_levels, by_columns)
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_groupby_categorical_index_and_columns(self, observed):
         # GH18432, adapted for GH25871
         columns = ["A", "B", "A", "B"]
@@ -778,7 +778,7 @@ def test_get_group(self):
         with pytest.raises(ValueError, match=msg):
             g.get_group(("foo", "bar", "baz"))
 
-    @pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_get_group_empty_bins(self, observed):
 
         d = DataFrame([3, 1, 7, 6])
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
@@ -472,12 +472,7 @@ def test_groupby_transform_with_int():
 
     # int case
     df = DataFrame(
-        {
-            "A": [1, 1, 1, 2, 2, 2],
-            "B": 1,
-            "C": [1, 2, 3, 1, 2, 3],
-            "D": "foo",
-        }
+        {"A": [1, 1, 1, 2, 2, 2], "B": 1, "C": [1, 2, 3, 1, 2, 3], "D": "foo",}
     )
     with np.errstate(all="ignore"):
         result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
@@ -1153,7 +1148,7 @@ def test_transform_lambda_indexing():
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.filterwarnings("ignore:Using 'observed:FutureWarning")
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_categorical_and_not_categorical_key(observed):
     # Checks that groupby-transform, when grouping by both a categorical
     # and a non-categorical key, doesn't try to expand the output to include
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py

Original file line number	Diff line number	Diff line change
`@@ -635,9 +635,8 @@ even if some categories are not present in the data:`
`635`	`635`	`)`
`636`	`636`	`df.sum(axis=1, level=1)`
`637`	`637`
`638`		`-Groupby will also show "unused" categories, though this default is deprecated`
`639`		`-and will be changed in a future release. It is recommended to use the`
`640`		-``observed`` keyword explicitly as below:
	`638`	`+Groupby will also show "unused" categories by default, though this behavior`
	`639`	+is deprecated. In a future release, users must specify a value for ``observed``:
`641`	`640`
`642`	`641`	`.. ipython:: python`
`643`	`642`