CLN: TODOs and FIXMEs (#45088)

jbrockmendel · web-flow · commit ec0c06bace62 · 2021-12-28T09:20:20.000-05:00
diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx
@@ -378,9 +378,6 @@ class NaTType(_NaT):
     def __reduce__(self):
         return (__nat_unpickle, (None, ))
 
-    def __rdiv__(self, other):
-        return _nat_rdivide_op(self, other)
-
     def __rtruediv__(self, other):
         return _nat_rdivide_op(self, other)
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -63,7 +63,10 @@
     needs_i8_conversion,
 )
 from pandas.core.dtypes.concat import concat_compat
-from pandas.core.dtypes.dtypes import PandasDtype
+from pandas.core.dtypes.dtypes import (
+    ExtensionDtype,
+    PandasDtype,
+)
 from pandas.core.dtypes.generic import (
     ABCDatetimeArray,
     ABCExtensionArray,
@@ -492,7 +495,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> npt.NDArray[np.bool_]:
     elif needs_i8_conversion(values.dtype):
         return isin(comps, values.astype(object))
 
-    elif is_extension_array_dtype(values.dtype):
+    elif isinstance(values.dtype, ExtensionDtype):
         return isin(np.asarray(comps), np.asarray(values))
 
     # GH16012
@@ -511,19 +514,7 @@ def f(c, v):
             f = np.in1d
 
     else:
-        # error: List item 0 has incompatible type "Union[Any, dtype[Any],
-        # ExtensionDtype]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
-        # Tuple[Any, Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any,
-        # Any]]"
-        # error: List item 1 has incompatible type "Union[Any, ExtensionDtype]";
-        # expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any,
-        # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]"
-        # error: List item 1 has incompatible type "Union[dtype[Any], ExtensionDtype]";
-        # expected "Union[dtype[Any], None, type, _SupportsDType, str, Tuple[Any,
-        # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]"
-        common = np.find_common_type(
-            [values.dtype, comps.dtype], []  # type: ignore[list-item]
-        )
+        common = np.find_common_type([values.dtype, comps.dtype], [])
         values = values.astype(common, copy=False)
         comps = comps.astype(common, copy=False)
         f = htable.ismember
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -127,7 +127,7 @@ def coerce_to_array(
         return values, mask
 
     values = np.array(values, copy=copy)
-    if is_object_dtype(values):
+    if is_object_dtype(values.dtype):
         inferred_type = lib.infer_dtype(values, skipna=True)
         if inferred_type == "empty":
             pass
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -177,7 +177,7 @@ def coerce_to_array(
 
     values = np.array(values, copy=copy)
     inferred_type = None
-    if is_object_dtype(values) or is_string_dtype(values):
+    if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):
         inferred_type = lib.infer_dtype(values, skipna=True)
         if inferred_type == "empty":
             pass
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -55,6 +55,7 @@
 
 from pandas.core import (
     algorithms,
+    nanops,
     ops,
 )
 from pandas.core.accessor import DirNamesMixin
@@ -70,7 +71,6 @@
     ensure_wrapped_if_datetimelike,
     extract_array,
 )
-import pandas.core.nanops as nanops
 
 if TYPE_CHECKING:
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -107,6 +107,8 @@
 _int32_max = np.iinfo(np.int32).max
 _int64_max = np.iinfo(np.int64).max
 
+_dtype_obj = np.dtype(object)
+
 NumpyArrayT = TypeVar("NumpyArrayT", bound=np.ndarray)
 
 
@@ -123,7 +125,7 @@ def maybe_convert_platform(
         #  or ExtensionArray here.
         arr = values
 
-    if arr.dtype == object:
+    if arr.dtype == _dtype_obj:
         arr = cast(np.ndarray, arr)
         arr = lib.maybe_convert_objects(arr)
 
@@ -159,7 +161,7 @@ def maybe_box_datetimelike(value: Scalar, dtype: Dtype | None = None) -> Scalar:
     -------
     scalar
     """
-    if dtype == object:
+    if dtype == _dtype_obj:
         pass
     elif isinstance(value, (np.datetime64, datetime)):
         value = Timestamp(value)
@@ -662,9 +664,7 @@ def _ensure_dtype_type(value, dtype: np.dtype):
     """
     # Start with exceptions in which we do _not_ cast to numpy types
 
-    # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-    # operand type: "Type[object_]")
-    if dtype == np.object_:  # type: ignore[comparison-overlap]
+    if dtype == _dtype_obj:
         return value
 
     # Note: before we get here we have already excluded isna(value)
@@ -1111,10 +1111,7 @@ def astype_nansafe(
         raise ValueError("dtype must be np.dtype or ExtensionDtype")
 
     if arr.dtype.kind in ["m", "M"] and (
-        issubclass(dtype.type, str)
-        # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-        # operand type: "Type[object]")
-        or dtype == object  # type: ignore[comparison-overlap]
+        issubclass(dtype.type, str) or dtype == _dtype_obj
     ):
         from pandas.core.construction import ensure_wrapped_if_datetimelike
 
@@ -1124,7 +1121,7 @@ def astype_nansafe(
     if issubclass(dtype.type, str):
         return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False)
 
-    elif is_datetime64_dtype(arr):
+    elif is_datetime64_dtype(arr.dtype):
         # Non-overlapping equality check (left operand type: "dtype[Any]", right
         # operand type: "Type[signedinteger[Any]]")
         if dtype == np.int64:  # type: ignore[comparison-overlap]
@@ -1146,7 +1143,7 @@ def astype_nansafe(
 
         raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")
 
-    elif is_timedelta64_dtype(arr):
+    elif is_timedelta64_dtype(arr.dtype):
         # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
         # operand type: "Type[signedinteger[Any]]")
         if dtype == np.int64:  # type: ignore[comparison-overlap]
@@ -1170,7 +1167,7 @@ def astype_nansafe(
     elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer):
         return astype_float_to_int_nansafe(arr, dtype, copy)
 
-    elif is_object_dtype(arr):
+    elif is_object_dtype(arr.dtype):
 
         # work around NumPy brokenness, #1987
         if np.issubdtype(dtype.type, np.integer):
@@ -1718,7 +1715,7 @@ def maybe_cast_to_datetime(
             # and no coercion specified
             value = sanitize_to_nanoseconds(value)
 
-        elif value.dtype == object:
+        elif value.dtype == _dtype_obj:
             value = maybe_infer_to_datetimelike(value)
 
     elif isinstance(value, list):
@@ -1862,9 +1859,7 @@ def construct_2d_arraylike_from_scalar(
 
     if dtype.kind in ["m", "M"]:
         value = maybe_unbox_datetimelike_tz_deprecation(value, dtype)
-    # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-    # operand type: "Type[object]")
-    elif dtype == object:  # type: ignore[comparison-overlap]
+    elif dtype == _dtype_obj:
         if isinstance(value, (np.timedelta64, np.datetime64)):
             # calling np.array below would cast to pytimedelta/pydatetime
             out = np.empty(shape, dtype=object)
@@ -2190,9 +2185,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
         # ExtensionBlock._can_hold_element
         return True
 
-    # error: Non-overlapping equality check (left operand type: "dtype[Any]", right
-    # operand type: "Type[object]")
-    if dtype == object:  # type: ignore[comparison-overlap]
+    if dtype == _dtype_obj:
         return True
 
     tipo = maybe_infer_dtype_type(element)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7997,13 +7997,13 @@ def pivot(self, index=None, columns=None, values=None) -> DataFrame:
         ...                     aggfunc={'D': np.mean,
         ...                              'E': [min, max, np.mean]})
         >>> table
-                        D    E
-                    mean  max      mean  min
+                          D   E
+                       mean max      mean  min
         A   C
-        bar large  5.500000  9.0  7.500000  6.0
-            small  5.500000  9.0  8.500000  8.0
-        foo large  2.000000  5.0  4.500000  4.0
-            small  2.333333  6.0  4.333333  2.0
+        bar large  5.500000   9  7.500000    6
+            small  5.500000   9  8.500000    8
+        foo large  2.000000   5  4.500000    4
+            small  2.333333   6  4.333333    2
         """
 
     @Substitution("")
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -443,9 +443,8 @@ def __new__(
             return Index._simple_new(data, name=name)
 
         elif is_ea_or_datetimelike_dtype(data_dtype):
-            # Argument 1 to "_dtype_to_subclass" of "Index" has incompatible type
-            # "Optional[Any]"; expected "Union[dtype[Any], ExtensionDtype]"  [arg-type]
-            klass = cls._dtype_to_subclass(data_dtype)  # type: ignore[arg-type]
+            data_dtype = cast(DtypeObj, data_dtype)
+            klass = cls._dtype_to_subclass(data_dtype)
             if klass is not Index:
                 result = klass(data, copy=copy, name=name, **kwargs)
                 if dtype is not None:
@@ -6245,7 +6244,7 @@ def _maybe_cast_slice_bound(self, label, side: str_t, kind=no_default):
         # wish to have special treatment for floats/ints, e.g. Float64Index and
         # datetimelike Indexes
         # reject them, if index does not contain label
-        if (is_float(label) or is_integer(label)) and label not in self._values:
+        if (is_float(label) or is_integer(label)) and label not in self:
             raise self._invalid_indexer("slice", label)
 
         return label
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -762,7 +762,7 @@ def replace_list(
 
         src_len = len(pairs) - 1
 
-        if is_string_dtype(values):
+        if is_string_dtype(values.dtype):
             # Calculate the mask once, prior to the call of comp
             # in order to avoid repeating the same computations
             mask = ~isna(values)
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -60,11 +60,11 @@ def pivot_table(
     columns=None,
     aggfunc: AggFuncType = "mean",
     fill_value=None,
-    margins=False,
-    dropna=True,
-    margins_name="All",
-    observed=False,
-    sort=True,
+    margins: bool = False,
+    dropna: bool = True,
+    margins_name: str = "All",
+    observed: bool = False,
+    sort: bool = True,
 ) -> DataFrame:
     index = _convert_by(index)
     columns = _convert_by(columns)
@@ -178,13 +178,12 @@ def __internal_pivot_table(
                 and v in agged
                 and not is_integer_dtype(agged[v])
             ):
-                if isinstance(agged[v], ABCDataFrame):
+                if not isinstance(agged[v], ABCDataFrame):
                     # exclude DataFrame case bc maybe_downcast_to_dtype expects
                     #  ArrayLike
-                    # TODO: why does test_pivot_table_doctest_case fail if
-                    # we don't do this apparently-unnecessary setitem?
-                    agged[v] = agged[v]
-                else:
+                    # e.g. test_pivot_table_multiindex_columns_doctest_case
+                    #  agged.columns is a MultiIndex and 'v' is indexing only
+                    #  on its first level.
                     agged[v] = maybe_downcast_to_dtype(agged[v], data[v].dtype)
 
     table = agged
@@ -253,7 +252,7 @@ def __internal_pivot_table(
 
 def _add_margins(
     table: DataFrame | Series,
-    data,
+    data: DataFrame,
     values,
     rows,
     cols,
@@ -331,7 +330,7 @@ def _add_margins(
     return result
 
 
-def _compute_grand_margin(data, values, aggfunc, margins_name: str = "All"):
+def _compute_grand_margin(data: DataFrame, values, aggfunc, margins_name: str = "All"):
 
     if values:
         grand_margin = {}
@@ -522,7 +521,7 @@ def crosstab(
     rownames=None,
     colnames=None,
     aggfunc=None,
-    margins=False,
+    margins: bool = False,
     margins_name: str = "All",
     dropna: bool = True,
     normalize=False,
@@ -682,7 +681,9 @@ def crosstab(
     return table
 
 
-def _normalize(table, normalize, margins: bool, margins_name="All"):
+def _normalize(
+    table: DataFrame, normalize, margins: bool, margins_name="All"
+) -> DataFrame:
 
     if not isinstance(normalize, (bool, str)):
         axis_subs = {0: "index", 1: "columns"}
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -686,7 +686,7 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex:
 
 
 def _stack_multi_columns(frame, level_num=-1, dropna=True):
-    def _convert_level_number(level_num, columns):
+    def _convert_level_number(level_num: int, columns):
         """
         Logic for converting the level number to something we can safely pass
         to swaplevel.
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
@@ -2084,11 +2084,12 @@ def agg(arr):
         with pytest.raises(KeyError, match="notpresent"):
             foo.pivot_table("notpresent", "X", "Y", aggfunc=agg)
 
-    def test_pivot_table_doctest_case(self):
-        # TODO: better name.  the relevant characteristic is that
-        #  the call to maybe_downcast_to_dtype(agged[v], data[v].dtype) in
+    def test_pivot_table_multiindex_columns_doctest_case(self):
+        # The relevant characteristic is that the call
+        #  to maybe_downcast_to_dtype(agged[v], data[v].dtype) in
         #  __internal_pivot_table has `agged[v]` a DataFrame instead of Series,
-        #  i.e agged.columns is not unique
+        #  In this case this is because agged.columns is a MultiIndex and 'v'
+        #  is only indexing on its first level.
         df = DataFrame(
             {
                 "A": ["foo", "foo", "foo", "foo", "foo", "bar", "bar", "bar", "bar"],
@@ -2131,6 +2132,8 @@ def test_pivot_table_doctest_case(self):
             ]
         )
         expected = DataFrame(vals, columns=cols, index=index)
+        expected[("E", "min")] = expected[("E", "min")].astype(np.int64)
+        expected[("E", "max")] = expected[("E", "max")].astype(np.int64)
         tm.assert_frame_equal(table, expected)
 
     def test_pivot_table_sort_false(self):

Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@`
`55`	`55`
`56`	`56`	`from pandas.core import (`
`57`	`57`	`algorithms,`
	`58`	`+ nanops,`
`58`	`59`	`ops,`
`59`	`60`	`)`
`60`	`61`	`from pandas.core.accessor import DirNamesMixin`
`@@ -70,7 +71,6 @@`
`70`	`71`	`ensure_wrapped_if_datetimelike,`
`71`	`72`	`extract_array,`
`72`	`73`	`)`
`73`		`-import pandas.core.nanops as nanops`
`74`	`74`
`75`	`75`	`if TYPE_CHECKING:`
`76`	`76`