CLN: assorted follow-ups (#45402)

jbrockmendel · web-flow · commit 56498ae33760 · 2022-01-17T08:48:15.000-05:00
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -996,7 +996,7 @@ def series_generator(self):
                 # GH#35462 re-pin mgr in case setitem changed it
                 ser._mgr = mgr
                 mgr.set_values(arr)
-                ser.name = name
+                object.__setattr__(ser, "_name", name)
                 yield ser
 
     @property
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
@@ -93,7 +93,7 @@
 
 class BaseMaskedDtype(ExtensionDtype):
     """
-    Base class for dtypes for BasedMaskedArray subclasses.
+    Base class for dtypes for BaseMaskedArray subclasses.
     """
 
     name: str
diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py
@@ -319,8 +319,6 @@ def __init__(self, values, copy=False):
         super().__init__(values, copy=copy)
         if not isinstance(values, type(self)):
             self._validate()
-        # error: Incompatible types in assignment (expression has type "StringDtype",
-        # variable has type "PandasDtype")
         NDArrayBacked.__init__(self, self._ndarray, StringDtype(storage="python"))
 
     def _validate(self):
diff --git a/pandas/core/construction.py b/pandas/core/construction.py
@@ -561,6 +561,10 @@ def sanitize_array(
                     #  it is lossy.
                     dtype = cast(np.dtype, dtype)
                     return np.array(data, dtype=dtype, copy=copy)
+
+                # We ignore the dtype arg and return floating values,
+                #  e.g. test_constructor_floating_data_int_dtype
+                # TODO: where is the discussion that documents the reason for this?
                 subarr = np.array(data, copy=copy)
         else:
             # we will try to copy by-definition here
@@ -591,18 +595,21 @@ def sanitize_array(
             try:
                 subarr = _try_cast(data, dtype, copy, raise_cast_failure)
             except ValueError:
-                casted = np.array(data, copy=False)
-                if casted.dtype.kind == "f" and is_integer_dtype(dtype):
-                    # GH#40110 match the behavior we have if we passed
-                    #  a ndarray[float] to begin with
-                    return sanitize_array(
-                        casted,
-                        index,
-                        dtype,
-                        copy=False,
-                        raise_cast_failure=raise_cast_failure,
-                        allow_2d=allow_2d,
-                    )
+                if is_integer_dtype(dtype):
+                    casted = np.array(data, copy=False)
+                    if casted.dtype.kind == "f":
+                        # GH#40110 match the behavior we have if we passed
+                        #  a ndarray[float] to begin with
+                        return sanitize_array(
+                            casted,
+                            index,
+                            dtype,
+                            copy=False,
+                            raise_cast_failure=raise_cast_failure,
+                            allow_2d=allow_2d,
+                        )
+                    else:
+                        raise
                 else:
                     raise
         else:
@@ -762,7 +769,8 @@ def _try_cast(
             #  data differently; _from_sequence treats naive as wall times,
             #  while maybe_cast_to_datetime treats it as UTC
             #  see test_maybe_promote_any_numpy_dtype_with_datetimetz
-
+            # TODO(2.0): with deprecations enforced, should be able to remove
+            #  special case.
             return maybe_cast_to_datetime(arr, dtype)
             # TODO: copy?
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1447,23 +1447,18 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
     """
     new_dtype: DtypeObj
 
-    if left.dtype.kind in ["i", "u", "c"] and (
-        lib.is_integer(right) or lib.is_float(right)
+    if (
+        isinstance(left, np.ndarray)
+        and left.dtype.kind in ["i", "u", "c"]
+        and (lib.is_integer(right) or lib.is_float(right))
     ):
         # e.g. with int8 dtype and right=512, we want to end up with
         # np.int16, whereas infer_dtype_from(512) gives np.int64,
         #  which will make us upcast too far.
         if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":
             right = int(right)
 
-        # Argument 1 to "result_type" has incompatible type "Union[ExtensionArray,
-        # ndarray[Any, Any]]"; expected "Union[Union[_SupportsArray[dtype[Any]],
-        # _NestedSequence[_SupportsArray[dtype[Any]]], bool, int, float, complex,
-        # str, bytes, _NestedSequence[Union[bool, int, float, complex, str, bytes]]],
-        # Union[dtype[Any], None, Type[Any], _SupportsDType[dtype[Any]], str,
-        # Union[Tuple[Any, int], Tuple[Any, Union[SupportsIndex,
-        # Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]]"
-        new_dtype = np.result_type(left, right)  # type:ignore[arg-type]
+        new_dtype = np.result_type(left, right)
 
     else:
         dtype, _ = infer_dtype_from(right, pandas_dtype=True)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -148,6 +148,7 @@
 from pandas.core.arrays import (
     DatetimeArray,
     ExtensionArray,
+    PeriodArray,
     TimedeltaArray,
 )
 from pandas.core.arrays.sparse import SparseFrameAccessor
@@ -900,7 +901,7 @@ def _can_fast_transpose(self) -> bool:
     @property
     def _values(  # type: ignore[override]
         self,
-    ) -> np.ndarray | DatetimeArray | TimedeltaArray:
+    ) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray:
         """
         Analogue to ._values that may return a 2D ExtensionArray.
         """
@@ -925,7 +926,7 @@ def _values(  # type: ignore[override]
             return self.values
 
         # more generally, whatever we allow in NDArrayBackedExtensionBlock
-        arr = cast("np.ndarray | DatetimeArray | TimedeltaArray", arr)
+        arr = cast("np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray", arr)
         return arr.T
 
     # ----------------------------------------------------------------------
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -705,17 +705,14 @@ def get_iterator(
         """
         splitter = self._get_splitter(data, axis=axis)
         keys = self.group_keys_seq
-        for key, group in zip(keys, splitter):
-            yield key, group.__finalize__(data, method="groupby")
+        yield from zip(keys, splitter)
 
     @final
     def _get_splitter(self, data: NDFrame, axis: int = 0) -> DataSplitter:
         """
         Returns
         -------
         Generator yielding subsetted objects
-
-        __finalize__ has not been called for the subsetted objects returned.
         """
         ids, _, ngroups = self.group_info
         return get_splitter(data, ids, ngroups, axis=axis)
@@ -753,7 +750,6 @@ def apply(
         zipped = zip(group_keys, splitter)
 
         for key, group in zipped:
-            group = group.__finalize__(data, method="groupby")
             object.__setattr__(group, "name", key)
 
             # group might be modified
@@ -1001,7 +997,6 @@ def _aggregate_series_pure_python(
         splitter = get_splitter(obj, ids, ngroups, axis=0)
 
         for i, group in enumerate(splitter):
-            group = group.__finalize__(obj, method="groupby")
             res = func(group)
             res = libreduction.extract_result(res)
 
@@ -1244,8 +1239,8 @@ class SeriesSplitter(DataSplitter):
     def _chop(self, sdata: Series, slice_obj: slice) -> Series:
         # fastpath equivalent to `sdata.iloc[slice_obj]`
         mgr = sdata._mgr.get_slice(slice_obj)
-        # __finalize__ not called here, must be applied by caller if applicable
-        return sdata._constructor(mgr, name=sdata.name, fastpath=True)
+        ser = sdata._constructor(mgr, name=sdata.name, fastpath=True)
+        return ser.__finalize__(sdata, method="groupby")
 
 
 class FrameSplitter(DataSplitter):
@@ -1256,8 +1251,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         # else:
         #     return sdata.iloc[:, slice_obj]
         mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
-        # __finalize__ not called here, must be applied by caller if applicable
-        return sdata._constructor(mgr)
+        df = sdata._constructor(mgr)
+        return df.__finalize__(sdata, method="groupby")
 
 
 def get_splitter(
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -272,6 +272,8 @@ def _should_fallback_to_positional(self) -> bool:
 
     @doc(Index._convert_slice_indexer)
     def _convert_slice_indexer(self, key: slice, kind: str, is_frame: bool = False):
+        # TODO(2.0): once #45324 deprecation is enforced we should be able
+        #  to simplify this.
         if is_float_dtype(self.dtype):
             assert kind in ["loc", "getitem"]
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -805,12 +805,8 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool:
     @final
     def _convert_tuple(self, key: tuple) -> tuple:
         # Note: we assume _tupleize_axis_indexer has been called, if necessary.
-        keyidx = []
         self._validate_key_length(key)
-        for i, k in enumerate(key):
-            idx = self._convert_to_indexer(k, axis=i)
-            keyidx.append(idx)
-
+        keyidx = [self._convert_to_indexer(k, axis=i) for i, k in enumerate(key)]
         return tuple(keyidx)
 
     @final
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -2148,8 +2148,6 @@ def _factorize_keys(
         rk = ensure_int64(rk.codes)
 
     elif isinstance(lk, ExtensionArray) and is_dtype_equal(lk.dtype, rk.dtype):
-        # error: Incompatible types in assignment (expression has type "ndarray",
-        # variable has type "ExtensionArray")
         lk, _ = lk._values_for_factorize()
 
         # error: Item "ndarray" of "Union[Any, ndarray]" has no attribute
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1162,12 +1162,11 @@ def _set_with_engine(self, key, value) -> None:
         self._mgr.setitem_inplace(loc, value)
 
     def _set_with(self, key, value):
-        # other: fancy integer or otherwise
+        # We got here via exception-handling off of InvalidIndexError, so
+        #  key should always be listlike at this point.
         assert not isinstance(key, tuple)
 
-        if is_scalar(key):
-            key = [key]
-        elif is_iterator(key):
+        if is_iterator(key):
             # Without this, the call to infer_dtype will consume the generator
             key = list(key)
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
@@ -354,7 +354,7 @@ def nargsort(
     ascending: bool = True,
     na_position: str = "last",
     key: Callable | None = None,
-    mask: np.ndarray | None = None,
+    mask: npt.NDArray[np.bool_] | None = None,
 ) -> npt.NDArray[np.intp]:
     """
     Intended to be a drop-in replacement for np.argsort which handles NaNs.
@@ -369,7 +369,7 @@ def nargsort(
     ascending : bool, default True
     na_position : {'first', 'last'}, default 'last'
     key : Optional[Callable], default None
-    mask : Optional[np.ndarray], default None
+    mask : Optional[np.ndarray[bool]], default None
         Passed when called by ExtensionArray.argsort.
 
     Returns
diff --git a/pandas/tests/io/pytables/__init__.py b/pandas/tests/io/pytables/__init__.py
@@ -7,9 +7,9 @@
     ),
     pytest.mark.filterwarnings(r"ignore:tostring\(\) is deprecated:DeprecationWarning"),
     pytest.mark.filterwarnings(
-        r"ignore:`np\.object` is a deprecated alias:DeprecationWarning"
+        r"ignore:`np\.object` is a deprecated alias.*:DeprecationWarning"
     ),
     pytest.mark.filterwarnings(
-        r"ignore:`np\.bool` is a deprecated alias:DeprecationWarning"
+        r"ignore:`np\.bool` is a deprecated alias.*:DeprecationWarning"
     ),
 ]
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -61,14 +61,14 @@ def test_asfreq(series_and_frame, freq, create_index):
 def test_asfreq_fill_value(series, create_index):
     # test for fill value during resampling, issue 3715
 
-    s = series
+    ser = series
 
-    result = s.resample("1H").asfreq()
-    new_index = create_index(s.index[0], s.index[-1], freq="1H")
-    expected = s.reindex(new_index)
+    result = ser.resample("1H").asfreq()
+    new_index = create_index(ser.index[0], ser.index[-1], freq="1H")
+    expected = ser.reindex(new_index)
     tm.assert_series_equal(result, expected)
 
-    frame = s.to_frame("value")
+    frame = ser.to_frame("value")
     frame.iloc[1] = None
     result = frame.resample("1H").asfreq(fill_value=4.0)
     new_index = create_index(frame.index[0], frame.index[-1], freq="1H")
@@ -104,11 +104,11 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method):
     if resample_method == "ohlc":
         pytest.skip("need to test for ohlc from GH13083")
 
-    s = empty_series_dti
-    result = getattr(s.resample(freq), resample_method)()
+    ser = empty_series_dti
+    result = getattr(ser.resample(freq), resample_method)()
 
-    expected = s.copy()
-    expected.index = _asfreq_compat(s.index, freq)
+    expected = ser.copy()
+    expected.index = _asfreq_compat(ser.index, freq)
 
     tm.assert_index_equal(result.index, expected.index)
     assert result.index.freq == expected.index.freq
@@ -123,17 +123,18 @@ def test_resample_nat_index_series(request, freq, series, resample_method):
     if freq == "M":
         request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
 
-    s = series.copy()
-    s.index = PeriodIndex([NaT] * len(s), freq=freq)
-    result = getattr(s.resample(freq), resample_method)()
+    ser = series.copy()
+    ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
+    rs = ser.resample(freq)
+    result = getattr(rs, resample_method)()
 
     if resample_method == "ohlc":
         expected = DataFrame(
-            [], index=s.index[:0].copy(), columns=["open", "high", "low", "close"]
+            [], index=ser.index[:0].copy(), columns=["open", "high", "low", "close"]
         )
         tm.assert_frame_equal(result, expected, check_dtype=False)
     else:
-        expected = s[:0].copy()
+        expected = ser[:0].copy()
         tm.assert_series_equal(result, expected, check_dtype=False)
     tm.assert_index_equal(result.index, expected.index)
     assert result.index.freq == expected.index.freq
@@ -226,9 +227,9 @@ def test_resample_empty_dtypes(index, dtype, resample_method):
 @pytest.mark.parametrize("freq", ["M", "D", "H"])
 def test_apply_to_empty_series(empty_series_dti, freq):
     # GH 14313
-    s = empty_series_dti
-    result = s.resample(freq).apply(lambda x: 1)
-    expected = s.resample(freq).apply(np.sum)
+    ser = empty_series_dti
+    result = ser.resample(freq).apply(lambda x: 1)
+    expected = ser.resample(freq).apply(np.sum)
 
     tm.assert_series_equal(result, expected, check_dtype=False)
 
@@ -248,9 +249,9 @@ def test_resampler_is_iterable(series):
 @all_ts
 def test_resample_quantile(series):
     # GH 15023
-    s = series
+    ser = series
     q = 0.75
     freq = "H"
-    result = s.resample(freq).quantile(q)
-    expected = s.resample(freq).agg(lambda x: x.quantile(q)).rename(s.name)
+    result = ser.resample(freq).quantile(q)
+    expected = ser.resample(freq).agg(lambda x: x.quantile(q)).rename(ser.name)
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py