diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
index cf548ba5d1133..81c33b53e21a8 100644
--- a/doc/source/user_guide/10min.rst
+++ b/doc/source/user_guide/10min.rst
@@ -702,11 +702,11 @@ Sorting is per order in the categories, not lexical order.
 
     df.sort_values(by="grade")
 
-Grouping by a categorical column also shows empty categories.
+Grouping by a categorical column can also show empty categories, using the observed keyword.
 
 .. ipython:: python
 
-    df.groupby("grade").size()
+    df.groupby("grade", observed=False).size()
 
 
 Plotting
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 2cd48ac7adb0e..f952bd9150ce5 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -809,8 +809,8 @@ Groupby operations on the index will preserve the index nature as well.
 
 .. ipython:: python
 
-   df2.groupby(level=0).sum()
-   df2.groupby(level=0).sum().index
+   df2.groupby(level=0, observed=False).sum()
+   df2.groupby(level=0, observed=False).sum().index
 
 Reindexing operations will return a resulting index based on the type of the passed
 indexer. Passing a list will return a plain-old ``Index``; indexing with
diff --git a/doc/source/user_guide/categorical.rst b/doc/source/user_guide/categorical.rst
index 5c43de05fb5b9..0221bc4101b63 100644
--- a/doc/source/user_guide/categorical.rst
+++ b/doc/source/user_guide/categorical.rst
@@ -622,7 +622,7 @@ even if some categories are not present in the data:
     s = pd.Series(pd.Categorical(["a", "b", "c", "c"], categories=["c", "a", "b", "d"]))
     s.value_counts()
 
-``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories.
+``DataFrame`` methods like :meth:`DataFrame.sum` also show "unused" categories:
 
 .. ipython:: python
 
@@ -635,7 +635,8 @@ even if some categories are not present in the data:
     )
     df.sum(axis=1, level=1)
 
-Groupby will also show "unused" categories:
+Groupby will also show "unused" categories by default, though this behavior
+is deprecated. In a future release, users must specify a value for ``observed``:
 
 .. ipython:: python
 
@@ -643,7 +644,7 @@ Groupby will also show "unused" categories:
         ["a", "b", "b", "b", "c", "c", "c"], categories=["a", "b", "c", "d"]
     )
     df = pd.DataFrame({"cats": cats, "values": [1, 2, 2, 2, 3, 4, 5]})
-    df.groupby("cats").mean()
+    df.groupby("cats", observed=False).mean()
 
     cats2 = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df2 = pd.DataFrame(
@@ -653,7 +654,7 @@ Groupby will also show "unused" categories:
             "values": [1, 2, 3, 4],
         }
     )
-    df2.groupby(["cats", "B"]).mean()
+    df2.groupby(["cats", "B"], observed=False).mean()
 
 
 Pivot tables:
@@ -662,7 +663,7 @@ Pivot tables:
 
     raw_cat = pd.Categorical(["a", "a", "b", "b"], categories=["a", "b", "c"])
     df = pd.DataFrame({"A": raw_cat, "B": ["c", "d", "c", "d"], "values": [1, 2, 3, 4]})
-    pd.pivot_table(df, values="values", index=["A", "B"])
+    pd.pivot_table(df, values="values", index=["A", "B"], observed=False)
 
 Data munging
 ------------
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index d6081155b58db..b6f30beae1dbb 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -1269,7 +1269,7 @@ can be used as group keys. If so, the order of the levels will be preserved:
 
    factor = pd.qcut(data, [0, 0.25, 0.5, 0.75, 1.0])
 
-   data.groupby(factor).mean()
+   data.groupby(factor, observed=True).mean()
 
 .. _groupby.specify:
 
diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst
index 340e1ce9ee1ef..cec8e44806250 100644
--- a/doc/source/whatsnew/v0.19.0.rst
+++ b/doc/source/whatsnew/v0.19.0.rst
@@ -1131,6 +1131,7 @@ An analogous change has been made to ``MultiIndex.from_product``.
 As a consequence, ``groupby`` and ``set_index`` also preserve categorical dtypes in indexes
 
 .. ipython:: python
+   :okwarning:
 
    df = pd.DataFrame({"A": [0, 1], "B": [10, 11], "C": cat})
    df_grouped = df.groupby(by=["A", "C"]).first()
diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index 2cb8e13e9a18a..dbd77aab4ff3d 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -291,6 +291,7 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr
 **New behavior**:
 
 .. ipython:: python
+   :okwarning:
 
    df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum()
 
diff --git a/doc/source/whatsnew/v0.22.0.rst b/doc/source/whatsnew/v0.22.0.rst
index ec9769c22e76b..d8672be0bc711 100644
--- a/doc/source/whatsnew/v0.22.0.rst
+++ b/doc/source/whatsnew/v0.22.0.rst
@@ -118,6 +118,7 @@ instead of ``NaN``.
 *pandas 0.22*
 
 .. ipython:: python
+   :okwarning:
 
    grouper = pd.Categorical(["a", "a"], categories=["a", "b"])
    pd.Series([1, 2]).groupby(grouper).sum()
@@ -126,6 +127,7 @@ To restore the 0.21 behavior of returning ``NaN`` for unobserved groups,
 use ``min_count>=1``.
 
 .. ipython:: python
+   :okwarning:
 
    pd.Series([1, 2]).groupby(grouper).sum(min_count=1)
 
diff --git a/doc/source/whatsnew/v0.23.0.rst b/doc/source/whatsnew/v0.23.0.rst
index f4caea9d363eb..a763803d6fa3b 100644
--- a/doc/source/whatsnew/v0.23.0.rst
+++ b/doc/source/whatsnew/v0.23.0.rst
@@ -288,6 +288,7 @@ For pivoting operations, this behavior is *already* controlled by the ``dropna``
    df
 
 .. ipython:: python
+   :okwarning:
 
    pd.pivot_table(df, values='values', index=['A', 'B'],
                   dropna=True)
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 8dbc6728dccfe..ce6e2a1395868 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -522,6 +522,7 @@ Deprecations
 - Deprecated :meth:`Index.asi8` for :class:`Index` subclasses other than :class:`.DatetimeIndex`, :class:`.TimedeltaIndex`, and :class:`PeriodIndex` (:issue:`37877`)
 - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
+- Deprecated default keyword argument of ``observed=False`` in :~meth:`DataFrame.groupby` and :~meth:`DataFrame.pivot_table` (:issue:`17594`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5f149f10b05d3..53f72abd8d93f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5677,7 +5677,7 @@ def value_counts(
         if subset is None:
             subset = self.columns.tolist()
 
-        counts = self.groupby(subset).grouper.size()
+        counts = self.groupby(subset, observed=True).grouper.size()
 
         if sort:
             counts = counts.sort_values(ascending=ascending)
@@ -6698,7 +6698,7 @@ def groupby(
         sort: bool = True,
         group_keys: bool = True,
         squeeze: bool = no_default,
-        observed: bool = False,
+        observed: Optional[bool] = None,
         dropna: bool = True,
     ) -> DataFrameGroupBy:
         from pandas.core.groupby.generic import DataFrameGroupBy
@@ -7029,7 +7029,7 @@ def pivot_table(
         margins=False,
         dropna=True,
         margins_name="All",
-        observed=False,
+        observed=None,
     ) -> DataFrame:
         from pandas.core.reshape.pivot import pivot_table
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4a9e020a0fe46..61cdc6b98d919 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -87,10 +87,15 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas as pd
-from pandas.core import arraylike, indexing, missing, nanops
-import pandas.core.algorithms as algos
+from pandas.core import (
+    algorithms as algos,
+    arraylike,
+    common as com,
+    indexing,
+    missing,
+    nanops,
+)
 from pandas.core.base import PandasObject, SelectionMixin
-import pandas.core.common as com
 from pandas.core.construction import create_series_with_explicit_dtype
 from pandas.core.flags import Flags
 from pandas.core.indexes import base as ibase
@@ -10545,7 +10550,8 @@ def pct_change(
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
         if axis is None:
             raise ValueError("Must specify 'axis' when aggregating by level.")
-        grouped = self.groupby(level=level, axis=axis, sort=False)
+        # see pr-35967 for discussion about the observed keyword
+        grouped = self.groupby(level=level, axis=axis, sort=False, observed=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
         axis = self._get_axis_number(axis)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 798c0742f03e5..98d26ccb34a00 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -526,7 +526,7 @@ def __init__(
         sort: bool = True,
         group_keys: bool = True,
         squeeze: bool = False,
-        observed: bool = False,
+        observed: Optional[bool] = None,
         mutated: bool = False,
         dropna: bool = True,
     ):
@@ -3016,7 +3016,7 @@ def get_groupby(
     sort: bool = True,
     group_keys: bool = True,
     squeeze: bool = False,
-    observed: bool = False,
+    observed: Optional[bool] = None,
     mutated: bool = False,
     dropna: bool = True,
 ) -> GroupBy:
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index e8af9da30a298..23b562301aeb1 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -2,6 +2,7 @@
 Provide user facing operators for doing the split part of the
 split-apply-combine paradigm.
 """
+import textwrap
 from typing import Dict, Hashable, List, Optional, Set, Tuple
 import warnings
 
@@ -31,6 +32,18 @@
 
 from pandas.io.formats.printing import pprint_thing
 
+_observed_msg = textwrap.dedent(
+    """\
+Grouping by a categorical but 'observed' was not specified.
+Using 'observed=False', but in a future version of pandas
+not specifying 'observed' will raise an error. Pass
+'observed=True' or 'observed=False' to silence this warning.
+
+See the `groupby` documentation for more information on the
+observed keyword.
+"""
+)
+
 
 class Grouper:
     """
@@ -432,7 +445,7 @@ def __init__(
         name=None,
         level=None,
         sort: bool = True,
-        observed: bool = False,
+        observed: Optional[bool] = None,
         in_axis: bool = False,
         dropna: bool = True,
     ):
@@ -495,6 +508,10 @@ def __init__(
             # a passed Categorical
             elif is_categorical_dtype(self.grouper):
 
+                if observed is None:
+                    warnings.warn(_observed_msg, FutureWarning)
+                    observed = False
+
                 self.grouper, self.all_grouper = recode_for_groupby(
                     self.grouper, self.sort, observed
                 )
@@ -631,7 +648,7 @@ def get_grouper(
     axis: int = 0,
     level=None,
     sort: bool = True,
-    observed: bool = False,
+    observed: Optional[bool] = None,
     mutated: bool = False,
     validate: bool = True,
     dropna: bool = True,
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 52ffb1567cb2d..c9ffc9a69281b 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -493,7 +493,12 @@ def _format_duplicate_message(self):
         duplicates = self[self.duplicated(keep="first")].unique()
         assert len(duplicates)
 
-        out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
+        # see pr-35967 about the observed keyword
+        out = (
+            Series(np.arange(len(self)))
+            .groupby(self, observed=False)
+            .agg(list)[duplicates]
+        )
         if self.nlevels == 1:
             out = out.rename_axis("label")
         return out.to_frame(name="positions")
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 2c6cdb846221f..94d8b50cf5597 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -109,13 +109,15 @@ def _groupby_and_merge(by, on, left: "DataFrame", right: "DataFrame", merge_piec
     if not isinstance(by, (list, tuple)):
         by = [by]
 
-    lby = left.groupby(by, sort=False)
+    # see pr-35967 for discussion about observed=False
+    # this is the previous default behavior if the group is a categorical
+    lby = left.groupby(by, sort=False, observed=False)
     rby: Optional[groupby.DataFrameGroupBy] = None
 
     # if we can groupby the rhs
     # then we can get vastly better perf
     if all(item in right.columns for item in by):
-        rby = right.groupby(by, sort=False)
+        rby = right.groupby(by, sort=False, observed=False)
 
     for key, lhs in lby:
 
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index 40496a5b8671b..19a56b1651197 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -46,7 +46,7 @@ def pivot_table(
     margins=False,
     dropna=True,
     margins_name="All",
-    observed=False,
+    observed=None,
 ) -> "DataFrame":
     index = _convert_by(index)
     columns = _convert_by(columns)
@@ -612,6 +612,8 @@ def crosstab(
         margins=margins,
         margins_name=margins_name,
         dropna=dropna,
+        # the below is only here to silence the FutureWarning
+        observed=False,
         **kwargs,
     )
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index b20cf8eed9a2e..b51e2a42293d0 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1674,7 +1674,7 @@ def groupby(
         sort: bool = True,
         group_keys: bool = True,
         squeeze: bool = no_default,
-        observed: bool = False,
+        observed: Optional[bool] = None,
         dropna: bool = True,
     ) -> "SeriesGroupBy":
         from pandas.core.groupby.generic import SeriesGroupBy
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index 3aeb3b664b27f..92e52a3d174dd 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -119,6 +119,17 @@
     This only applies if any of the groupers are Categoricals.
     If True: only show observed values for categorical groupers.
     If False: show all values for categorical groupers.
+
+    The current default of ``observed=False`` is deprecated. In
+    the future this will be a required keyword in the presence
+    of a categorical grouper and a failure to specify a value will
+    result in an error.
+
+    Explicitly pass ``observed=True`` to silence the warning and not
+    show all observed values.
+    Explicitly pass ``observed=False`` to silence the warning and
+    show groups for all observed values.
+
 dropna : bool, default True
     If True, and if group keys contain NA values, NA values together
     with row/column will be dropped.
diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py
index 7122a38db9d0a..82bf1af5da297 100644
--- a/pandas/plotting/_matplotlib/boxplot.py
+++ b/pandas/plotting/_matplotlib/boxplot.py
@@ -195,7 +195,7 @@ def _grouped_plot_by_column(
     return_type=None,
     **kwargs,
 ):
-    grouped = data.groupby(by)
+    grouped = data.groupby(by, observed=False)
     if columns is None:
         if not isinstance(by, (list, tuple)):
             by = [by]
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 073918eda3deb..cd3757f6a5ecf 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -13,8 +13,7 @@
 from pandas.core.dtypes.common import is_integer_dtype
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, concat
-import pandas._testing as tm
+from pandas import DataFrame, Index, MultiIndex, Series, _testing as tm, concat
 from pandas.core.base import SpecificationError
 from pandas.core.groupby.grouper import Grouping
 
@@ -1074,7 +1073,7 @@ def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
@@ -1108,7 +1107,7 @@ def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
 
     input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
     input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
-    result_df = input_df.groupby("cat").agg(grp_col_dict)
+    result_df = input_df.groupby("cat", observed=False).agg(grp_col_dict)
 
     # create expected dataframe
     cat_index = pd.CategoricalIndex(
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index c907391917ca8..6e96605418731 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -1,13 +1,20 @@
 """
 test cython .agg behavior
 """
-
 import numpy as np
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, NaT, Series, Timedelta, Timestamp, bdate_range
-import pandas._testing as tm
+from pandas import (
+    DataFrame,
+    Index,
+    NaT,
+    Series,
+    Timedelta,
+    Timestamp,
+    _testing as tm,
+    bdate_range,
+)
 from pandas.core.groupby.groupby import DataError
 
 
@@ -175,6 +182,7 @@ def test__cython_agg_general(op, targop):
         ("max", np.max),
     ],
 )
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets(op, targop, observed):
     df = DataFrame([11, 12, 13])
     grps = range(0, 55, 5)
@@ -189,6 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_cython_agg_empty_buckets_nanops(observed):
     # GH-18869 can't call nanops on empty groups, so hardcode expected
     # for these
diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
index 5d0f6d6262899..5138f5de21a4c 100644
--- a/pandas/tests/groupby/aggregate/test_other.py
+++ b/pandas/tests/groupby/aggregate/test_other.py
@@ -1,7 +1,6 @@
 """
 test all other .agg behavior
 """
-
 import datetime as dt
 from functools import partial
 
@@ -15,10 +14,10 @@
     MultiIndex,
     PeriodIndex,
     Series,
+    _testing as tm,
     date_range,
     period_range,
 )
-import pandas._testing as tm
 from pandas.core.base import SpecificationError
 
 from pandas.io.formats.printing import pprint_thing
@@ -555,6 +554,7 @@ def test_agg_structs_series(structure, expected):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_agg_category_nansum(observed):
     categories = ["a", "b", "c"]
     df = DataFrame(
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index 8cf77ca6335f4..a1b3f7fe2e463 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -11,9 +11,9 @@
     Index,
     MultiIndex,
     Series,
+    _testing as tm,
     qcut,
 )
-import pandas._testing as tm
 
 
 def cartesian_product_for_groupers(result, args, names, fill_value=np.NaN):
@@ -212,6 +212,7 @@ def f(x):
     tm.assert_index_equal((desc_result.stack().index.get_level_values(1)), exp)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_level_get_group(observed):
     # GH15155
     df = DataFrame(
@@ -276,6 +277,7 @@ def test_apply(ordered):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed(observed):
     # multiple groupers, don't re-expand the output space
     # of the grouper
@@ -384,11 +386,13 @@ def test_observed(observed):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_codes_remap(observed):
     d = {"C1": [3, 3, 4, 5], "C2": [1, 2, 3, 4], "C3": [10, 100, 200, 34]}
     df = DataFrame(d)
     values = pd.cut(df["C1"], [1, 2, 3, 6])
     values.name = "cat"
+
     groups_double_key = df.groupby([values, "C2"], observed=observed)
 
     idx = MultiIndex.from_arrays([values, [1, 2, 3, 4]], names=["cat", "C2"])
@@ -423,12 +427,14 @@ def test_observed_perf():
     assert result.index.levels[2].nunique() == df.other_id.nunique()
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups(observed):
     # gh-20583
     # test that we have the appropriate groups
 
     cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"])
     df = DataFrame({"cat": cat, "vals": [1, 2, 3]})
+
     g = df.groupby("cat", observed=observed)
 
     result = g.groups
@@ -444,6 +450,7 @@ def test_observed_groups(observed):
     tm.assert_dict_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_observed_groups_with_nan(observed):
     # GH 24740
     df = DataFrame(
@@ -480,6 +487,7 @@ def test_observed_nth():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_with_nan(observed):
     # GH 21151
     s1 = Categorical([np.nan, "a", np.nan, "a"], categories=["a", "b", "c"])
@@ -503,6 +511,7 @@ def test_dataframe_categorical_with_nan(observed):
 @pytest.mark.parametrize("ordered", [True, False])
 @pytest.mark.parametrize("observed", [True, False])
 @pytest.mark.parametrize("sort", [True, False])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_categorical_ordered_observed_sort(ordered, observed, sort):
     # GH 25871: Fix groupby sorting on ordered Categoricals
     # GH 25167: Groupby with observed=True doesn't sort
@@ -1062,7 +1071,7 @@ def test_groupby_multiindex_categorical_datetime():
             "values": np.arange(9),
         }
     )
-    result = df.groupby(["key1", "key2"]).mean()
+    result = df.groupby(["key1", "key2"], observed=False).mean()
 
     idx = MultiIndex.from_product(
         [
@@ -1167,6 +1176,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
 
 @pytest.mark.parametrize("operation", ["agg", "apply"])
 @pytest.mark.parametrize("observed", [False, None])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
     # GH 24880
     index, _ = MultiIndex.from_product(
@@ -1231,6 +1241,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
         ),
     ],
 )
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
     # GH 24880
     expected = Series(data=data, index=index, name="C")
@@ -1242,12 +1253,13 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
 
 def test_groupby_categorical_series_dataframe_consistent(df_cat):
     # GH 20416
-    expected = df_cat.groupby(["A", "B"])["C"].mean()
-    result = df_cat.groupby(["A", "B"]).mean()["C"]
+    expected = df_cat.groupby(["A", "B"], observed=False)["C"].mean()
+    result = df_cat.groupby(["A", "B"], observed=False).mean()["C"]
     tm.assert_series_equal(result, expected)
 
 
 @pytest.mark.parametrize("code", [([1, 0, 0]), ([0, 0, 0])])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_categorical_axis_1(code):
     # GH 13420
     df = DataFrame({"a": [1, 2, 3, 4], "b": [-1, -2, -3, -4], "c": [5, 6, 7, 8]})
@@ -1257,6 +1269,7 @@ def test_groupby_categorical_axis_1(code):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_groupby_cat_preserves_structure(observed, ordered):
     # GH 28787
     df = DataFrame(
@@ -1285,6 +1298,7 @@ def test_get_nonexistent_category():
         )
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_on_2_categoricals_unobserved(reduction_func, observed, request):
     # GH 17605
     if reduction_func == "ngroup":
@@ -1384,6 +1398,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_true(reduction_fun
 
 
 @pytest.mark.parametrize("observed", [False, None])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
     reduction_func, observed, request
 ):
@@ -1417,6 +1432,7 @@ def test_dataframe_groupby_on_2_categoricals_when_observed_is_false(
         assert (res.loc[unobserved_cats] == expected).all().all()
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_categorical_aggregation_getitem():
     # GH 8870
     d = {"foo": [10, 8, 4, 1], "bar": [10, 20, 30, 40], "baz": ["d", "c", "d", "c"]}
@@ -1472,6 +1488,7 @@ def test_groupy_first_returned_categorical_instead_of_dataframe(func):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_read_only_category_no_sort():
     # GH33410
     cats = np.array([1, 2])
@@ -1480,10 +1497,12 @@ def test_read_only_category_no_sort():
         {"a": [1, 3, 5, 7], "b": Categorical([1, 1, 2, 2], categories=Index(cats))}
     )
     expected = DataFrame(data={"a": [2, 6]}, index=CategoricalIndex([1, 2], name="b"))
+
     result = df.groupby("b", sort=False).mean()
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_sorted_missing_category_values():
     # GH 28597
     df = DataFrame(
@@ -1631,6 +1650,7 @@ def test_categorical_transform():
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
@@ -1656,6 +1676,7 @@ def test_series_groupby_first_on_categorical_col_grouped_on_2_categoricals(
 
 
 @pytest.mark.parametrize("func", ["first", "last"])
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_df_groupby_first_on_categorical_col_grouped_on_2_categoricals(
     func: str, observed: bool
 ):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 12e570490487d..cc0c6c61e7e56 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -7,9 +7,17 @@
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
-import pandas._testing as tm
-import pandas.core.nanops as nanops
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    Series,
+    Timestamp,
+    _testing as tm,
+    date_range,
+    isna,
+)
+from pandas.core import nanops as nanops
 from pandas.util import _test_decorators as td
 
 
@@ -410,6 +418,7 @@ def test_cython_median():
     tm.assert_frame_equal(rs, xp)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_median_empty_bins(observed):
     df = DataFrame(np.random.randint(0, 44, 500))
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 7c179a79513fa..a96789a7c80ce 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -15,10 +15,10 @@
     MultiIndex,
     Series,
     Timestamp,
+    _testing as tm,
     date_range,
     read_csv,
 )
-import pandas._testing as tm
 from pandas.core.base import SpecificationError
 import pandas.core.common as com
 
@@ -2012,7 +2012,7 @@ def test_dup_labels_output_shape(groupby_func, idx):
         pytest.skip("Not applicable")
 
     df = DataFrame([[1, 1]], columns=idx)
-    grp_by = df.groupby([0])
+    grp_by = df.groupby([0], observed=False)
 
     args = []
     if groupby_func in {"fillna", "nth"}:
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index d268d87708552..574a42fb7224e 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -3,8 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Series
-import pandas._testing as tm
+from pandas import DataFrame, Series, _testing as tm
 
 
 @pytest.mark.parametrize(
@@ -21,7 +20,7 @@ def test_groupby_preserves_subclass(obj, groupby_func):
     if isinstance(obj, Series) and groupby_func in {"corrwith"}:
         pytest.skip("Not applicable")
 
-    grouped = obj.groupby(np.arange(0, 10))
+    grouped = obj.groupby(np.arange(0, 10), observed=False)
 
     # Groups should preserve subclass type
     assert isinstance(grouped.get_group(0), type(obj))
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 1d2208592a06d..979b01371247f 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -1,5 +1,4 @@
 """ test where we are determining what we are grouping, or getting groups """
-
 import numpy as np
 import pytest
 
@@ -11,9 +10,9 @@
     MultiIndex,
     Series,
     Timestamp,
+    _testing as tm,
     date_range,
 )
-import pandas._testing as tm
 from pandas.core.groupby.grouper import Grouping
 
 # selection
@@ -311,6 +310,7 @@ def test_groupby_levels_and_columns(self):
         by_columns.columns = by_columns.columns.astype(np.int64)
         tm.assert_frame_equal(by_levels, by_columns)
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_groupby_categorical_index_and_columns(self, observed):
         # GH18432, adapted for GH25871
         columns = ["A", "B", "A", "B"]
@@ -702,6 +702,29 @@ def test_groupby_multiindex_level_empty(self):
         )
         tm.assert_frame_equal(result, expected)
 
+    def test_default_observed_deprecated(self):
+        # pr-35967
+        df = DataFrame([["A", 1, 1], ["A", 2, 1], ["B", 1, 1]], columns=["x", "y", "z"])
+        df.x = df.x.astype("category")
+        df.y = df.x.astype("category")
+
+        with tm.assert_produces_warning(
+            expected_warning=FutureWarning, check_stacklevel=False
+        ):
+            df.groupby(["x", "y"])
+
+        with tm.assert_produces_warning(None) as any_warnings:
+            df.groupby(["x", "y"], observed=True)
+            df.groupby(["x", "y"], observed=False)
+        assert len(any_warnings) == 0
+
+        cat = pd.Categorical(["A", "B", "C"], categories=["A", "B", "C", "D"])
+        s = Series(cat)
+        with tm.assert_produces_warning(
+            expected_warning=FutureWarning, check_stacklevel=False
+        ):
+            s.groupby(cat)
+
 
 # get_group
 # --------------------------------
@@ -755,6 +778,7 @@ def test_get_group(self):
         with pytest.raises(ValueError, match=msg):
             g.get_group(("foo", "bar", "baz"))
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_get_group_empty_bins(self, observed):
 
         d = DataFrame([3, 1, 7, 6])
diff --git a/pandas/tests/groupby/test_size.py b/pandas/tests/groupby/test_size.py
index ba27e5a24ba00..cb724d46bc0d1 100644
--- a/pandas/tests/groupby/test_size.py
+++ b/pandas/tests/groupby/test_size.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Index, PeriodIndex, Series
-import pandas._testing as tm
+from pandas import DataFrame, Index, PeriodIndex, Series, _testing as tm
 
 
 @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
@@ -50,7 +49,7 @@ def test_size_period_index():
 def test_size_on_categorical(as_index):
     df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
     df["A"] = df["A"].astype("category")
-    result = df.groupby(["A", "B"], as_index=as_index).size()
+    result = df.groupby(["A", "B"], as_index=as_index, observed=False).size()
 
     expected = DataFrame(
         [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"]
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 8acd051fbc643..71e182f34bb0a 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -13,10 +13,10 @@
     MultiIndex,
     Series,
     Timestamp,
+    _testing as tm,
     concat,
     date_range,
 )
-import pandas._testing as tm
 from pandas.core.groupby.groupby import DataError
 
 
@@ -994,7 +994,7 @@ def test_transform_absent_categories(func):
     x_cats = range(2)
     y = [1]
     df = DataFrame({"x": Categorical(x_vals, x_cats), "y": y})
-    result = getattr(df.y.groupby(df.x), func)()
+    result = getattr(df.y.groupby(df.x, observed=False), func)()
     expected = df.y
     tm.assert_series_equal(result, expected)
 
@@ -1153,6 +1153,7 @@ def test_transform_lambda_indexing():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
 def test_categorical_and_not_categorical_key(observed):
     # Checks that groupby-transform, when grouping by both a categorical
     # and a non-categorical key, doesn't try to expand the output to include
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index f9b2a02920841..11fef6f271672 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -12,10 +12,10 @@
     Index,
     MultiIndex,
     Series,
+    _testing as tm,
     concat,
     date_range,
 )
-import pandas._testing as tm
 from pandas.api.types import CategoricalDtype as CDT
 from pandas.core.reshape.pivot import pivot_table
 
@@ -108,6 +108,7 @@ def test_pivot_table(self, observed):
         expected = self.data.groupby(index + [columns])["D"].agg(np.mean).unstack()
         tm.assert_frame_equal(table, expected)
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_pivot_table_categorical_observed_equal(self, observed):
         # issue #24923
         df = DataFrame(
@@ -193,7 +194,9 @@ def test_pivot_table_categorical(self):
             ["c", "d", "c", "d"], categories=["c", "d", "y"], ordered=True
         )
         df = DataFrame({"A": cat1, "B": cat2, "values": [1, 2, 3, 4]})
-        result = pd.pivot_table(df, values="values", index=["A", "B"], dropna=True)
+        result = pd.pivot_table(
+            df, values="values", index=["A", "B"], dropna=True, observed=False
+        )
 
         exp_index = MultiIndex.from_arrays([cat1, cat2], names=["A", "B"])
         expected = DataFrame({"values": [1, 2, 3, 4]}, index=exp_index)
@@ -212,7 +215,9 @@ def test_pivot_table_dropna_categoricals(self, dropna):
         )
 
         df["A"] = df["A"].astype(CDT(categories, ordered=False))
-        result = df.pivot_table(index="B", columns="A", values="C", dropna=dropna)
+        result = df.pivot_table(
+            index="B", columns="A", values="C", dropna=dropna, observed=False
+        )
         expected_columns = Series(["a", "b", "c"], name="A")
         expected_columns = expected_columns.astype(CDT(categories, ordered=False))
         expected_index = Series([1, 2, 3], name="B")
@@ -240,7 +245,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
             }
         )
 
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         expected = DataFrame(
             {"B": [2, 3]},
             index=Index(
@@ -265,7 +270,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
             }
         )
 
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         expected = DataFrame(
             {"B": [2, 3, 0]},
             index=Index(
@@ -281,7 +286,7 @@ def test_pivot_with_non_observable_dropna(self, dropna):
     def test_pivot_with_interval_index(self, interval_values, dropna):
         # GH 25814
         df = DataFrame({"A": interval_values, "B": 1})
-        result = df.pivot_table(index="A", values="B", dropna=dropna)
+        result = df.pivot_table(index="A", values="B", dropna=dropna, observed=False)
         expected = DataFrame({"B": 1}, index=Index(interval_values.unique(), name="A"))
         tm.assert_frame_equal(result, expected)
 
@@ -299,7 +304,13 @@ def test_pivot_with_interval_index_margins(self):
         )
 
         pivot_tab = pd.pivot_table(
-            df, index="C", columns="B", values="A", aggfunc="sum", margins=True
+            df,
+            index="C",
+            columns="B",
+            values="A",
+            aggfunc="sum",
+            margins=True,
+            observed=False,
         )
 
         result = pivot_tab["All"]
@@ -1752,6 +1763,7 @@ def test_margins_casted_to_float(self, observed):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_pivot_with_categorical(self, observed, ordered):
         # gh-21370
         idx = [np.nan, "low", "high", "low", np.nan]
@@ -1787,6 +1799,7 @@ def test_pivot_with_categorical(self, observed, ordered):
 
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_categorical_aggfunc(self, observed):
         # GH 9534
         df = DataFrame(
@@ -1807,6 +1820,7 @@ def test_categorical_aggfunc(self, observed):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings("ignore:Grouping by a categorical:FutureWarning")
     def test_categorical_pivot_index_ordering(self, observed):
         # GH 8731
         df = DataFrame(
@@ -2058,6 +2072,13 @@ def agg(arr):
         with pytest.raises(KeyError, match="notpresent"):
             foo.pivot_table("notpresent", "X", "Y", aggfunc=agg)
 
+    def test_pivot_table_observed_deprecated_default(self):
+        # pr-35967
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # make sure we actually have a category to warn on
+            self.data.A = self.data.A.astype("category")
+            self.data.pivot_table(values="D", index=["A", "B"], columns=["C"])
+
 
 class TestPivot:
     def test_pivot(self):