CLN: assorted

jbrockmendel · jbrockmendel · commit 50e9404b8498 · 2023-06-20T14:24:35.000-07:00
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -950,7 +950,7 @@ def group_skew(
                     isna_entry = _treat_as_na(val, False)
 
                 if not isna_entry:
-                    # Based on RunningSats::Push from
+                    # Based on RunningStats::Push from
                     #  https://www.johndcook.com/blog/skewness_kurtosis/
                     n1 = nobs[lab, j]
                     n = n1 + 1
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -974,7 +974,7 @@ cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
             "Only resolutions 's', 'ms', 'us', 'ns' are supported."
         )
 
-    td_base._value= value
+    td_base._value = value
     td_base._is_populated = 0
     td_base._creso = reso
     return td_base
diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py
@@ -1131,13 +1131,7 @@ def take(
         it's called by :meth:`Series.reindex`, or any other method
         that causes realignment, with a `fill_value`.
         """
-        # TODO: Remove once we got rid of the (indices < 0) check
-        if not is_array_like(indices):
-            indices_array = np.asanyarray(indices)
-        else:
-            # error: Incompatible types in assignment (expression has type
-            # "Sequence[int]", variable has type "ndarray")
-            indices_array = indices  # type: ignore[assignment]
+        indices_array = np.asanyarray(indices)
 
         if len(self._pa_array) == 0 and (indices_array >= 0).any():
             raise IndexError("cannot do a non-empty take")
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
@@ -42,7 +42,6 @@
     maybe_box_datetimelike,
 )
 from pandas.core.dtypes.common import (
-    is_array_like,
     is_bool_dtype,
     is_integer,
     is_list_like,
@@ -428,19 +427,16 @@ def __init__(
             # Union[int, Sequence[int]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
             data = np.array([], dtype=dtype)  # type: ignore[arg-type]
 
-        if not is_array_like(data):
-            try:
-                # probably shared code in sanitize_series
-
-                data = sanitize_array(data, index=None)
-            except ValueError:
-                # NumPy may raise a ValueError on data like [1, []]
-                # we retry with object dtype here.
-                if dtype is None:
-                    dtype = np.dtype(object)
-                    data = np.atleast_1d(np.asarray(data, dtype=dtype))
-                else:
-                    raise
+        try:
+            data = sanitize_array(data, index=None)
+        except ValueError:
+            # NumPy may raise a ValueError on data like [1, []]
+            # we retry with object dtype here.
+            if dtype is None:
+                dtype = np.dtype(object)
+                data = np.atleast_1d(np.asarray(data, dtype=dtype))
+            else:
+                raise
 
         if copy:
             # TODO: avoid double copy when dtype forces cast.
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1665,17 +1665,6 @@ def _check_fill_value(self):
                     FutureWarning,
                     stacklevel=find_stack_level(),
                 )
-        elif isinstance(self.subtype, CategoricalDtype):
-            # TODO: is this even supported?  It is reached in
-            #  test_dtype_sparse_with_fill_value_not_present_in_data
-            if self.subtype.categories is None or val not in self.subtype.categories:
-                warnings.warn(
-                    "Allowing arbitrary scalar fill_value in SparseDtype is "
-                    "deprecated. In a future version, the fill_value must be "
-                    "a valid value for the SparseDtype.subtype.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
         else:
             dummy = np.empty(0, dtype=self.subtype)
             dummy = ensure_wrapped_if_datetimelike(dummy)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -673,9 +673,9 @@ def __init__(
         manager = get_option("mode.data_manager")
 
         # GH47215
-        if index is not None and isinstance(index, set):
+        if isinstance(index, set):
             raise ValueError("index cannot be a set")
-        if columns is not None and isinstance(columns, set):
+        if isinstance(columns, set):
             raise ValueError("columns cannot be a set")
 
         if copy is None:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -47,6 +47,7 @@
     AlignJoin,
     AnyArrayLike,
     ArrayLike,
+    Axes,
     Axis,
     AxisInt,
     CompressionOptions,
@@ -271,7 +272,7 @@ def __init__(self, data: Manager) -> None:
     def _init_mgr(
         cls,
         mgr: Manager,
-        axes,
+        axes: dict[Literal["index", "columns"], Axes | None],
         dtype: DtypeObj | None = None,
         copy: bool_t = False,
     ) -> Manager:
@@ -3995,7 +3996,6 @@ class  max_speed
             ):
                 return self.copy(deep=None)
         elif self.ndim == 1:
-            # TODO: be consistent here for DataFrame vs Series
             raise TypeError(
                 f"{type(self).__name__}.take requires a sequence of integers, "
                 "not slice."
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4191,7 +4191,7 @@ def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
 
         # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
         #  to simplify this.
-        if isinstance(self.dtype, np.dtype) and self.dtype.kind == "f":
+        if lib.is_np_dtype(self.dtype, "f"):
             # We always treat __getitem__ slicing as label-based
             # translate to locations
             return self.slice_indexer(start, stop, step)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -233,7 +233,7 @@ def make_block_same_class(
         values,
         placement: BlockPlacement | None = None,
         refs: BlockValuesRefs | None = None,
-    ) -> Block:
+    ) -> Self:
         """Wrap given values in a block of same type as self."""
         # Pre-2.0 we called ensure_wrapped_if_datetimelike because fastparquet
         #  relied on it, as of 2.0 the caller is responsible for this.
@@ -503,6 +503,7 @@ def convert(
     # ---------------------------------------------------------------------
     # Array-Like Methods
 
+    @final
     @cache_readonly
     def dtype(self) -> DtypeObj:
         return self.values.dtype
@@ -559,7 +560,7 @@ def to_native_types(self, na_rep: str = "nan", quoting=None, **kwargs) -> Block:
         return self.make_block(result)
 
     @final
-    def copy(self, deep: bool = True) -> Block:
+    def copy(self, deep: bool = True) -> Self:
         """copy constructor"""
         values = self.values
         refs: BlockValuesRefs | None
@@ -1499,7 +1500,8 @@ def quantile(
         result = ensure_block_shape(result, ndim=2)
         return new_block_2d(result, placement=self._mgr_locs)
 
-    def round(self, decimals: int, using_cow: bool = False) -> Block:
+    @final
+    def round(self, decimals: int, using_cow: bool = False) -> Self:
         """
         Rounds the values.
         If the block is not of an integer or float dtype, nothing happens.
@@ -1765,9 +1767,7 @@ def putmask(self, mask, new, using_cow: bool = False) -> list[Block]:
 
         if using_cow and self.refs.has_reference():
             values = values.copy()
-            self = self.make_block_same_class(  # type: ignore[assignment]
-                values.T if values.ndim == 2 else values
-            )
+            self = self.make_block_same_class(values.T if values.ndim == 2 else values)
 
         try:
             # Caller is responsible for ensuring matching lengths
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -939,8 +939,7 @@ def take(
         -------
         BlockManager
         """
-        assert isinstance(indexer, np.ndarray), type(indexer)
-        assert indexer.dtype == np.intp, indexer.dtype
+        # Caller is responsible for ensuring indexer annotation is accurate
 
         n = self.shape[axis]
         indexer = maybe_convert_indices(indexer, n, verify=verify)
diff --git a/pandas/core/missing.py b/pandas/core/missing.py
@@ -311,8 +311,6 @@ def interpolate_array_2d(
     limit_direction: str = "forward",
     limit_area: str | None = None,
     fill_value: Any | None = None,
-    coerce: bool = False,
-    downcast: str | None = None,
     **kwargs,
 ) -> None:
     """
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -511,8 +511,8 @@ def pivot(
     data: DataFrame,
     *,
     columns: IndexLabel,
-    index: IndexLabel | lib.NoDefault = lib.NoDefault,
-    values: IndexLabel | lib.NoDefault = lib.NoDefault,
+    index: IndexLabel | lib.NoDefault = lib.no_default,
+    values: IndexLabel | lib.NoDefault = lib.no_default,
 ) -> DataFrame:
     columns_listlike = com.convert_to_list_like(columns)
 
@@ -522,24 +522,24 @@ def pivot(
     data = data.copy(deep=False)
     data.index = data.index.copy()
     data.index.names = [
-        name if name is not None else lib.NoDefault for name in data.index.names
+        name if name is not None else lib.no_default for name in data.index.names
     ]
 
     indexed: DataFrame | Series
-    if values is lib.NoDefault:
-        if index is not lib.NoDefault:
+    if values is lib.no_default:
+        if index is not lib.no_default:
             cols = com.convert_to_list_like(index)
         else:
             cols = []
 
-        append = index is lib.NoDefault
+        append = index is lib.no_default
         # error: Unsupported operand types for + ("List[Any]" and "ExtensionArray")
         # error: Unsupported left operand type for + ("ExtensionArray")
         indexed = data.set_index(
             cols + columns_listlike, append=append  # type: ignore[operator]
         )
     else:
-        if index is lib.NoDefault:
+        if index is lib.no_default:
             if isinstance(data.index, MultiIndex):
                 # GH 23955
                 index_list = [
@@ -569,7 +569,7 @@ def pivot(
     # "Hashable"
     result = indexed.unstack(columns_listlike)  # type: ignore[arg-type]
     result.index.names = [
-        name if name is not lib.NoDefault else None for name in result.index.names
+        name if name is not lib.no_default else None for name in result.index.names
     ]
 
     return result
diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
@@ -20,6 +20,7 @@
     DtypeObj,
     F,
     Scalar,
+    npt,
 )
 from pandas.util._decorators import Appender
 from pandas.util._exceptions import find_stack_level
@@ -3366,7 +3367,7 @@ def casefold(self):
     )
 
 
-def cat_safe(list_of_columns: list, sep: str):
+def cat_safe(list_of_columns: list[npt.NDArray[np.object_]], sep: str):
     """
     Auxiliary function for :meth:`str.cat`.
 
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -433,13 +433,13 @@ def test_frame_empty(self):
         assert not df._is_mixed_type
 
         data = StringIO(df.to_json())
-        tm.assert_frame_equal(
-            read_json(data, dtype=dict(df.dtypes)),
-            df,
-            check_index_type=False,
-        )
+        result = read_json(data, dtype=dict(df.dtypes))
+        tm.assert_frame_equal(result, df, check_index_type=False)
+
+    def test_frame_empty_to_json(self):
         # GH 7445
-        result = DataFrame({"test": []}, index=[]).to_json(orient="columns")
+        df = DataFrame({"test": []}, index=[])
+        result = df.to_json(orient="columns")
         expected = '{"test":{}}'
         assert result == expected
 
diff --git a/pandas/tests/resample/test_base.py b/pandas/tests/resample/test_base.py
@@ -80,11 +80,11 @@ def test_asfreq_fill_value(series, create_index):
 
 @all_ts
 def test_resample_interpolate(frame):
-    # # 12925
+    # GH#12925
     df = frame
-    tm.assert_frame_equal(
-        df.resample("1T").asfreq().interpolate(), df.resample("1T").interpolate()
-    )
+    result = df.resample("1T").asfreq().interpolate()
+    expected = df.resample("1T").interpolate()
+    tm.assert_frame_equal(result, expected)
 
 
 def test_raises_on_non_datetimelike_index():
@@ -95,7 +95,7 @@ def test_raises_on_non_datetimelike_index():
         "but got an instance of 'RangeIndex'"
     )
     with pytest.raises(TypeError, match=msg):
-        xp.resample("A").mean()
+        xp.resample("A")
 
 
 @all_ts
@@ -132,13 +132,17 @@ def test_resample_empty_series(freq, empty_series_dti, resample_method, request)
 
 
 @all_ts
-@pytest.mark.parametrize("freq", ["M", "D", "H"])
-def test_resample_nat_index_series(request, freq, series, resample_method):
+@pytest.mark.parametrize(
+    "freq",
+    [
+        pytest.param("M", marks=pytest.mark.xfail(reason="Don't know why this fails")),
+        "D",
+        "H",
+    ],
+)
+def test_resample_nat_index_series(freq, series, resample_method):
     # GH39227
 
-    if freq == "M":
-        request.node.add_marker(pytest.mark.xfail(reason="Don't know why this fails"))
-
     ser = series.copy()
     ser.index = PeriodIndex([NaT] * len(ser), freq=freq)
     rs = ser.resample(freq)
diff --git a/pandas/tests/series/indexing/test_take.py b/pandas/tests/series/indexing/test_take.py
@@ -5,6 +5,15 @@
 import pandas._testing as tm
 
 
+def test_take_validate_axis():
+    # GH#51022
+    ser = Series([-1, 5, 6, 2, 4])
+
+    msg = "No axis named foo for object type Series"
+    with pytest.raises(ValueError, match=msg):
+        ser.take([1, 2], axis="foo")
+
+
 def test_take():
     ser = Series([-1, 5, 6, 2, 4])
 
diff --git a/pandas/tests/series/methods/test_reindex.py b/pandas/tests/series/methods/test_reindex.py
@@ -132,6 +132,8 @@ def test_reindex_pad():
     expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
     tm.assert_series_equal(reindexed, expected)
 
+
+def test_reindex_pad2():
     # GH4604
     s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
     new_index = ["a", "g", "c", "f"]
@@ -148,13 +150,17 @@ def test_reindex_pad():
     result = s.reindex(new_index, method="ffill")
     tm.assert_series_equal(result, expected)
 
+
+def test_reindex_inference():
     # inference of new dtype
     s = Series([True, False, False, True], index=list("abcd"))
     new_index = "agc"
     result = s.reindex(list(new_index)).ffill()
     expected = Series([True, True, False], index=list(new_index))
     tm.assert_series_equal(result, expected)
 
+
+def test_reindex_downcasting():
     # GH4618 shifted series downcasting
     s = Series(False, index=range(0, 5))
     result = s.shift(1).bfill()
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py

Original file line number	Diff line number	Diff line change
`@@ -974,7 +974,7 @@ cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):`
`974`	`974`	`"Only resolutions 's', 'ms', 'us', 'ns' are supported."`
`975`	`975`	`)`
`976`	`976`
`977`		`- td_base._value= value`
	`977`	`+ td_base._value = value`
`978`	`978`	`td_base._is_populated = 0`
`979`	`979`	`td_base._creso = reso`
`980`	`980`	`return td_base`
Original file line number	Diff line number	Diff line change
`@@ -20,6 +20,7 @@`
`20`	`20`	`DtypeObj,`
`21`	`21`	`F,`
`22`	`22`	`Scalar,`
	`23`	`+ npt,`
`23`	`24`	`)`
`24`	`25`	`from pandas.util._decorators import Appender`
`25`	`26`	`from pandas.util._exceptions import find_stack_level`
`@@ -3366,7 +3367,7 @@ def casefold(self):`
`3366`	`3367`	`)`
`3367`	`3368`
`3368`	`3369`
`3369`		`-def cat_safe(list_of_columns: list, sep: str):`
	`3370`	`+def cat_safe(list_of_columns: list[npt.NDArray[np.object_]], sep: str):`
`3370`	`3371`	`"""`
`3371`	`3372`	Auxiliary function for :meth:`str.cat`.
`3372`	`3373`