add mode.null_grouper_warning option

tehunter · tehunter · commit 41131a14324a · 2025-04-25T13:50:29.000Z
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -457,6 +457,22 @@ def is_terminal() -> bool:
     )
 
 
+null_grouper_warning = """
+: string
+    Whether to show or hide NullKeyWarning if default grouping would result in a
+    null group key being dropped,
+    The default is False
+"""
+
+with cf.config_prefix("mode"):
+    cf.register_option(
+        "null_grouper_warning",
+        False,
+        null_grouper_warning,
+        validator=is_bool,
+    )
+
+
 string_storage_doc = """
 : string
     The default storage for StringDtype.
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -13,6 +13,8 @@
 
 import numpy as np
 
+from pandas._config.config import get_option
+
 from pandas._libs import lib
 from pandas._libs.tslibs import OutOfBoundsDatetime
 from pandas.errors import (
@@ -621,6 +623,7 @@ def dropna(self) -> bool:
     @cache_readonly
     def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
         uniques: ArrayLike
+        unspecified_dropna = self._dropna is lib.no_default
         if self._passed_categorical:
             # we make a CategoricalIndex out of the cat grouper
             # preserving the categories / ordered attributes;
@@ -662,7 +665,7 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
                         # NA code is based on first appearance, increment higher codes
                         codes = np.where(codes >= na_code, codes + 1, codes)
                     codes = np.where(na_mask, na_code, codes)
-                elif self._dropna is lib.no_default:
+                elif get_option("null_grouper_warning") and unspecified_dropna:
                     warnings.warn(
                         _NULL_KEY_MESSAGE,
                         NullKeyWarning,
@@ -688,8 +691,11 @@ def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
             codes, uniques = algorithms.factorize(  # type: ignore[assignment]
                 self.grouping_vector, sort=self._sort, use_na_sentinel=self.dropna
             )
-            # TODO: Is `min(codes)` or `-1 in codes` faster?
-            if self._dropna is lib.no_default and (codes == -1).any():
+            if (
+                get_option("null_grouper_warning")
+                and unspecified_dropna
+                and codes.min() == -1
+            ):
                 warnings.warn(
                     _NULL_KEY_MESSAGE,
                     NullKeyWarning,
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
@@ -10,6 +10,12 @@
 from pandas.tests.groupby import get_groupby_method_args
 
 
+@pytest.fixture(scope="module", autouse=True)
+def setup_warnings():
+    with pd.option_context("mode.null_grouper_warning", True):
+        yield
+
+
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [