TYP: Mypy workaround for NoDefault (#47045)

twoertwein · jbrockmendel · rhshadrach · web-flow · commit 1c8269404a77 · 2022-05-25T14:53:55.000-07:00
* TYP: NoDefault * ix mypy issues; re-write isinstance(..., NoDefault) * remove two more casts * ENH: DatetimeArray fields support non-nano (#47044) * DEPR: groupby numeric_only default (#47025) * DOC: Clarify decay argument validation in ewm when times is provided (#47026) * DOC: Fix some typos in pandas/. (#47022) * remove two more casts * avoid cast-like annotation * left/right * cannot use | Co-authored-by: jbrockmendel <jbrockmendel@gmail.com> Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Co-authored-by: Matthew Roeschke <emailformattr@gmail.com> Co-authored-by: Shuangchi He <34329208+Yulv-git@users.noreply.github.com>
diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi
@@ -23,9 +23,11 @@ ndarray_obj_2d = np.ndarray
 
 from enum import Enum
 
-class NoDefault(Enum): ...
+class _NoDefault(Enum):
+    no_default = ...
 
-no_default: NoDefault
+no_default = _NoDefault.no_default
+NoDefault = Literal[_NoDefault.no_default]
 
 i8max: int
 u8max: int
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -1,6 +1,7 @@
 from collections import abc
 from decimal import Decimal
 from enum import Enum
+from typing import Literal
 import warnings
 
 cimport cython
@@ -2791,7 +2792,7 @@ cdef _infer_all_nats(dtype, ndarray datetimes, ndarray timedeltas):
     return result
 
 
-class NoDefault(Enum):
+class _NoDefault(Enum):
     # We make this an Enum
     # 1) because it round-trips through pickle correctly (see GH#40397)
     # 2) because mypy does not understand singletons
@@ -2802,7 +2803,8 @@ class NoDefault(Enum):
 
 
 # Note: no_default is exported to the public API in pandas.api.extensions
-no_default = NoDefault.no_default  # Sentinel indicating the default value.
+no_default = _NoDefault.no_default  # Sentinel indicating the default value.
+NoDefault = Literal[_NoDefault.no_default]
 
 
 @cython.boundscheck(False)
diff --git a/pandas/_testing/asserters.py b/pandas/_testing/asserters.py
@@ -112,12 +112,7 @@ def assert_almost_equal(
             FutureWarning,
             stacklevel=find_stack_level(),
         )
-        # https://github.com/python/mypy/issues/7642
-        # error: Argument 1 to "_get_tol_from_less_precise" has incompatible
-        # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
-        rtol = atol = _get_tol_from_less_precise(
-            check_less_precise  # type: ignore[arg-type]
-        )
+        rtol = atol = _get_tol_from_less_precise(check_less_precise)
 
     if isinstance(left, Index):
         assert_index_equal(
@@ -345,12 +340,7 @@ def _get_ilevel_values(index, level):
             FutureWarning,
             stacklevel=find_stack_level(),
         )
-        # https://github.com/python/mypy/issues/7642
-        # error: Argument 1 to "_get_tol_from_less_precise" has incompatible
-        # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]"
-        rtol = atol = _get_tol_from_less_precise(
-            check_less_precise  # type: ignore[arg-type]
-        )
+        rtol = atol = _get_tol_from_less_precise(check_less_precise)
 
     # instance validation
     _check_isinstance(left, right, Index)
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -313,7 +313,8 @@ def closed(self) -> bool:
 XMLParsers = Literal["lxml", "etree"]
 
 # Interval closed type
-IntervalClosedType = Literal["left", "right", "both", "neither"]
+IntervalLeftRight = Literal["left", "right"]
+IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]
 
 # datetime and NaTType
 DatetimeNaTType = Union[datetime, "NaTType"]
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -263,7 +263,7 @@ def _simple_new(
         cls: type[IntervalArrayT],
         left,
         right,
-        closed=None,
+        closed: IntervalClosedType | None = None,
         copy: bool = False,
         dtype: Dtype | None = None,
         verify_integrity: bool = True,
@@ -416,7 +416,7 @@ def _from_factorized(
     def from_breaks(
         cls: type[IntervalArrayT],
         breaks,
-        closed="right",
+        closed: IntervalClosedType | None = "right",
         copy: bool = False,
         dtype: Dtype | None = None,
     ) -> IntervalArrayT:
@@ -492,7 +492,7 @@ def from_arrays(
         cls: type[IntervalArrayT],
         left,
         right,
-        closed="right",
+        closed: IntervalClosedType | None = "right",
         copy: bool = False,
         dtype: Dtype | None = None,
     ) -> IntervalArrayT:
@@ -956,10 +956,10 @@ def _concat_same_type(
         -------
         IntervalArray
         """
-        closed = {interval.closed for interval in to_concat}
-        if len(closed) != 1:
+        closed_set = {interval.closed for interval in to_concat}
+        if len(closed_set) != 1:
             raise ValueError("Intervals must all be closed on the same side.")
-        closed = closed.pop()
+        closed = closed_set.pop()
 
         left = np.concatenate([interval.left for interval in to_concat])
         right = np.concatenate([interval.right for interval in to_concat])
@@ -1328,7 +1328,7 @@ def overlaps(self, other):
     # ---------------------------------------------------------------------
 
     @property
-    def closed(self):
+    def closed(self) -> IntervalClosedType:
         """
         Whether the intervals are closed on the left-side, right-side, both or
         neither.
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -673,7 +673,7 @@ def resolve_numeric_only(numeric_only: bool | None | lib.NoDefault) -> bool:
         # first default to None
         result = False
     else:
-        result = cast(bool, numeric_only)
+        result = numeric_only
     return result
 
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -6279,8 +6279,7 @@ def dropna(
             # faster equivalent to 'agg_obj.count(agg_axis) > 0'
             mask = notna(agg_obj).any(axis=agg_axis, bool_only=False)
         else:
-            if how is not no_default:
-                raise ValueError(f"invalid how option: {how}")
+            raise ValueError(f"invalid how option: {how}")
 
         if np.all(mask):
             result = self.copy()
@@ -8050,9 +8049,6 @@ def groupby(
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        # https://github.com/python/mypy/issues/7642
-        # error: Argument "squeeze" to "DataFrameGroupBy" has incompatible type
-        # "Union[bool, NoDefault]"; expected "bool"
         return DataFrameGroupBy(
             obj=self,
             keys=by,
@@ -8061,7 +8057,7 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,  # type: ignore[arg-type]
+            squeeze=squeeze,
             observed=observed,
             dropna=dropna,
         )
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7899,8 +7899,8 @@ def between_time(
                 FutureWarning,
                 stacklevel=find_stack_level(),
             )
-            left = True if isinstance(include_start, lib.NoDefault) else include_start
-            right = True if isinstance(include_end, lib.NoDefault) else include_end
+            left = True if include_start is lib.no_default else include_start
+            right = True if include_end is lib.no_default else include_end
 
             inc_dict: dict[tuple[bool_t, bool_t], IntervalClosedType] = {
                 (True, True): "both",
@@ -10690,7 +10690,6 @@ def _stat_function(
 
         if axis is None:
             axis = self._stat_axis_number
-        axis = cast(Axis, axis)
         if level is not None:
             warnings.warn(
                 "Using the level keyword in DataFrame and Series aggregations is "
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1580,7 +1580,7 @@ def idxmax(
             # DataFrame.idxmax for backwards compatibility
             numeric_only_arg = None if axis == 0 else False
         else:
-            numeric_only_arg = cast(bool, numeric_only)
+            numeric_only_arg = numeric_only
 
         def func(df):
             res = df._reduce(
@@ -1616,7 +1616,7 @@ def idxmin(
             # DataFrame.idxmin for backwards compatibility
             numeric_only_arg = None if axis == 0 else False
         else:
-            numeric_only_arg = cast(bool, numeric_only)
+            numeric_only_arg = numeric_only
 
         def func(df):
             res = df._reduce(
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1277,9 +1277,7 @@ def _resolve_numeric_only(
             else:
                 numeric_only = False
 
-        # error: Incompatible return value type (got "Union[bool, NoDefault]",
-        # expected "bool")
-        return numeric_only  # type: ignore[return-value]
+        return numeric_only
 
     def _maybe_warn_numeric_only_depr(
         self, how: str, result: DataFrame | Series, numeric_only: bool | lib.NoDefault
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -34,6 +34,7 @@
     Dtype,
     DtypeObj,
     IntervalClosedType,
+    IntervalLeftRight,
     npt,
 )
 from pandas.util._decorators import (
@@ -1039,12 +1040,12 @@ def date_range(
     DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
                   dtype='datetime64[ns]', freq='D')
     """
-    if inclusive is not None and not isinstance(closed, lib.NoDefault):
+    if inclusive is not None and closed is not lib.no_default:
         raise ValueError(
             "Deprecated argument `closed` cannot be passed"
             "if argument `inclusive` is not None"
         )
-    elif not isinstance(closed, lib.NoDefault):
+    elif closed is not lib.no_default:
         warnings.warn(
             "Argument `closed` is deprecated in favor of `inclusive`.",
             FutureWarning,
@@ -1087,7 +1088,7 @@ def bdate_range(
     name: Hashable = None,
     weekmask=None,
     holidays=None,
-    closed: lib.NoDefault = lib.no_default,
+    closed: IntervalLeftRight | lib.NoDefault | None = lib.no_default,
     inclusive: IntervalClosedType | None = None,
     **kwargs,
 ) -> DatetimeIndex:
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -251,7 +251,7 @@ def __new__(
     def from_breaks(
         cls,
         breaks,
-        closed: str = "right",
+        closed: IntervalClosedType | None = "right",
         name: Hashable = None,
         copy: bool = False,
         dtype: Dtype | None = None,
@@ -282,7 +282,7 @@ def from_arrays(
         cls,
         left,
         right,
-        closed: str = "right",
+        closed: IntervalClosedType = "right",
         name: Hashable = None,
         copy: bool = False,
         dtype: Dtype | None = None,
@@ -957,7 +957,7 @@ def interval_range(
     periods=None,
     freq=None,
     name: Hashable = None,
-    closed: lib.NoDefault = lib.no_default,
+    closed: IntervalClosedType | lib.NoDefault = lib.no_default,
     inclusive: IntervalClosedType | None = None,
 ) -> IntervalIndex:
     """
@@ -1054,12 +1054,12 @@ def interval_range(
     IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                   dtype='interval[int64, both]')
     """
-    if inclusive is not None and not isinstance(closed, lib.NoDefault):
+    if inclusive is not None and closed is not lib.no_default:
         raise ValueError(
             "Deprecated argument `closed` cannot be passed "
             "if argument `inclusive` is not None"
         )
-    elif not isinstance(closed, lib.NoDefault):
+    elif closed is not lib.no_default:
         warnings.warn(
             "Argument `closed` is deprecated in favor of `inclusive`.",
             FutureWarning,
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -10,6 +10,7 @@
     Hashable,
     Iterable,
     List,
+    Literal,
     Sequence,
     Tuple,
     cast,
@@ -1397,7 +1398,7 @@ def format(
             sparsify = get_option("display.multi_sparse")
 
         if sparsify:
-            sentinel = ""
+            sentinel: Literal[""] | bool | lib.NoDefault = ""
             # GH3547 use value of sparsify as sentinel if it's "Falsey"
             assert isinstance(sparsify, bool) or sparsify is lib.no_default
             if sparsify in [False, lib.no_default]:
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -16,6 +16,7 @@
     Timestamp,
 )
 from pandas._libs.lib import infer_dtype
+from pandas._typing import IntervalLeftRight
 
 from pandas.core.dtypes.common import (
     DT64NS_DTYPE,
@@ -560,7 +561,7 @@ def _format_labels(
     bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None
 ):
     """based on the dtype, return our labels"""
-    closed = "right" if right else "left"
+    closed: IntervalLeftRight = "right" if right else "left"
 
     formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta]
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1952,8 +1952,6 @@ def groupby(
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
 
-        # error: Argument "squeeze" to "SeriesGroupBy" has incompatible type
-        # "Union[bool, NoDefault]"; expected "bool"
         return SeriesGroupBy(
             obj=self,
             keys=by,
@@ -1962,7 +1960,7 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,  # type: ignore[arg-type]
+            squeeze=squeeze,
             observed=observed,
             dropna=dropna,
         )