diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index da1b2e750392c..4139b65e248d8 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -438,7 +438,7 @@ Metadata Other ^^^^^ -- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are presnet (:issue:`52840`) +- Bug in :class:`FloatingArray.__contains__` with ``NaN`` item incorrectly returning ``False`` when ``NaN`` values are present (:issue:`52840`) - Bug in :func:`assert_almost_equal` now throwing assertion error for two unequal sets (:issue:`51727`) - Bug in :func:`assert_frame_equal` checks category dtypes even when asked not to check index type (:issue:`52126`) - Bug in :meth:`DataFrame.reindex` with a ``fill_value`` that should be inferred with a :class:`ExtensionDtype` incorrectly inferring ``object`` dtype (:issue:`52586`) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 94ad2aa3a751f..2861f917d8336 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -346,7 +346,7 @@ def kth_smallest(numeric_t[::1] arr, Py_ssize_t k) -> numeric_t: def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): cdef: Py_ssize_t i, xi, yi, N, K - bint minpv + int64_t minpv float64_t[:, ::1] result ndarray[uint8_t, ndim=2] mask int64_t nobs = 0 @@ -357,7 +357,7 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): if minp is None: minpv = 1 else: - minpv = minp + minpv = minp result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi index e08faaaa03139..8321200a84b76 100644 --- a/pandas/_libs/index.pyi +++ b/pandas/_libs/index.pyi @@ -5,6 +5,8 @@ from pandas._typing import npt from pandas import MultiIndex from pandas.core.arrays import ExtensionArray +multiindex_nulls_shift: int + class IndexEngine: over_size_threshold: bool def __init__(self, values: np.ndarray) -> None: ... diff --git a/pandas/_libs/internals.pyi b/pandas/_libs/internals.pyi index 14dfcb2b1e712..9996fbfc346df 100644 --- a/pandas/_libs/internals.pyi +++ b/pandas/_libs/internals.pyi @@ -102,5 +102,5 @@ class BlockValuesRefs: referenced_blocks: list[weakref.ref] def __init__(self, blk: SharedBlock | None = ...) -> None: ... def add_reference(self, blk: SharedBlock) -> None: ... - def add_index_reference(self, index: object) -> None: ... + def add_index_reference(self, index: Index) -> None: ... def has_reference(self) -> bool: ... diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 88c61b7fce7f0..cfc43521cf606 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -966,7 +966,7 @@ cdef class BlockValuesRefs: Parameters ---------- - index: object + index : Index The index that the new reference should point to. """ self.referenced_blocks.append(weakref.ref(index)) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index a6a9a431a9658..bc2886e5b531c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -2612,7 +2612,7 @@ def maybe_convert_objects(ndarray[object] objects, return tdi._data._ndarray seen.object_ = True - if seen.period_: + elif seen.period_: if is_period_array(objects): from pandas import PeriodIndex pi = PeriodIndex(objects) @@ -2621,7 +2621,7 @@ def maybe_convert_objects(ndarray[object] objects, return pi._data seen.object_ = True - if seen.interval_: + elif seen.interval_: if is_interval_array(objects): from pandas import IntervalIndex ii = IntervalIndex(objects) @@ -2631,7 +2631,7 @@ def maybe_convert_objects(ndarray[object] objects, seen.object_ = True - if seen.nat_: + elif seen.nat_: if not seen.object_ and not seen.numeric_ and not seen.bool_: # all NaT, None, or nan (at least one NaT) # see GH#49340 for discussion of desired behavior diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 032e43bf9c089..7908c9df60df8 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -852,9 +852,7 @@ def _constructor_sliced(self): class SubclassedCategorical(Categorical): - @property - def _constructor(self): - return SubclassedCategorical + pass def _make_skipna_wrapper(alternative, skipna_alternative=None): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 74ae5796e85fc..37c1fa76fbbcf 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1240,7 +1240,7 @@ def take( if not is_array_like(arr): arr = np.asarray(arr) - indices = np.asarray(indices, dtype=np.intp) + indices = ensure_platform_int(indices) if allow_fill: # Pandas style, -1 means NA diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 1a1ccd6bba131..0c2adb89a2422 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1086,15 +1086,10 @@ def agg(self): result = super().agg() if result is None: f = self.f - kwargs = self.kwargs # string, list-like, and dict-like are entirely handled in super assert callable(f) - # we can be called from an inner function which - # passes this meta-data - kwargs.pop("_level", None) - # try a regular apply, this evaluates lambdas # row-by-row; however if the lambda is expected a Series # expression, e.g.: lambda x: x-x.quantile(0.25) diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 74cc30a4e030d..f65d2d20e028e 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -138,7 +138,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other): if values.dtype == object: dtype, _ = infer_dtype_from(other) - if isinstance(dtype, np.dtype) and dtype.kind in "mM": + if lib.is_np_dtype(dtype, "mM"): # https://github.com/numpy/numpy/issues/12550 # timedelta64 will incorrectly cast to int if not is_list_like(other): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a4447bffed5f5..41e05ffc70b2e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2022,7 +2022,7 @@ def _validate_listlike(self, value): "Cannot set a Categorical with another, " "without identical categories" ) - # is_dtype_equal implies categories_match_up_to_permutation + # dtype equality implies categories_match_up_to_permutation value = self._encode_with_my_categories(value) return value._codes diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 126a70a930065..39dade594a5af 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -63,7 +63,6 @@ from pandas.core.arrays import datetimelike as dtl from pandas.core.arrays._ranges import generate_regular_range -from pandas.core.arrays.sparse.dtype import SparseDtype import pandas.core.common as com from pandas.tseries.frequencies import get_period_alias @@ -2035,11 +2034,7 @@ def _sequence_to_dt64ns( if out_unit is not None: out_dtype = np.dtype(f"M8[{out_unit}]") - if ( - data_dtype == object - or is_string_dtype(data_dtype) - or isinstance(data_dtype, SparseDtype) - ): + if data_dtype == object or is_string_dtype(data_dtype): # TODO: We do not have tests specific to string-dtypes, # also complex or categorical or other extension copy = False diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 8c9b45bd452a0..35ce12a6fa795 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -48,7 +48,6 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import BaseMaskedDtype -from pandas.core.dtypes.inference import is_array_like from pandas.core.dtypes.missing import ( array_equivalent, is_valid_na_for_dtype, @@ -172,20 +171,13 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return type(self)(self._data[item], newmask) - @doc(ExtensionArray.fillna) @doc(ExtensionArray.fillna) def fillna(self, value=None, method=None, limit: int | None = None) -> Self: value, method = validate_fillna_kwargs(value, method) mask = self._mask - if is_array_like(value): - if len(value) != len(self): - raise ValueError( - f"Length of 'value' does not match. Got ({len(value)}) " - f" expected {len(self)}" - ) - value = value[mask] + value = missing.check_value_size(value, mask, len(self)) if mask.any(): if method is not None: diff --git a/pandas/core/flags.py b/pandas/core/flags.py index f07c6917d91e5..c4d92bcb4e994 100644 --- a/pandas/core/flags.py +++ b/pandas/core/flags.py @@ -1,7 +1,11 @@ from __future__ import annotations +from typing import TYPE_CHECKING import weakref +if TYPE_CHECKING: + from pandas.core.generic import NDFrame + class Flags: """ @@ -44,9 +48,9 @@ class Flags: """ - _keys = {"allows_duplicate_labels"} + _keys: set[str] = {"allows_duplicate_labels"} - def __init__(self, obj, *, allows_duplicate_labels) -> None: + def __init__(self, obj: NDFrame, *, allows_duplicate_labels: bool) -> None: self._allows_duplicate_labels = allows_duplicate_labels self._obj = weakref.ref(obj) @@ -95,13 +99,13 @@ def allows_duplicate_labels(self, value: bool) -> None: self._allows_duplicate_labels = value - def __getitem__(self, key): + def __getitem__(self, key: str): if key not in self._keys: raise KeyError(key) return getattr(self, key) - def __setitem__(self, key, value) -> None: + def __setitem__(self, key: str, value) -> None: if key not in self._keys: raise ValueError(f"Unknown flag {key}. Must be one of {self._keys}") setattr(self, key, value) @@ -109,7 +113,7 @@ def __setitem__(self, key, value) -> None: def __repr__(self) -> str: return f"" - def __eq__(self, other): + def __eq__(self, other) -> bool: if isinstance(other, type(self)): return self.allows_duplicate_labels == other.allows_duplicate_labels return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9cd3a4e91eaf8..5357dc35b70ee 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6530,7 +6530,7 @@ def sort_values( axis: Axis = ..., ascending=..., inplace: Literal[True], - kind: str = ..., + kind: SortKind = ..., na_position: str = ..., ignore_index: bool = ..., key: ValueKeyFunc = ..., @@ -6544,7 +6544,7 @@ def sort_values( axis: Axis = 0, ascending: bool | list[bool] | tuple[bool, ...] = True, inplace: bool = False, - kind: str = "quicksort", + kind: SortKind = "quicksort", na_position: str = "last", ignore_index: bool = False, key: ValueKeyFunc = None, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4eb29428b7dd1..78f7b69beb118 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6206,7 +6206,7 @@ def _check_inplace_setting(self, value) -> bool_t: """check whether we allow in-place setting with this type of value""" if self._is_mixed_type and not self._mgr.is_numeric_mixed_type: # allow an actual np.nan through - if is_float(value) and np.isnan(value) or value is lib.no_default: + if (is_float(value) and np.isnan(value)) or value is lib.no_default: return True raise TypeError( diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 841d8bb0749d0..179e118bf3c0f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2868,8 +2868,9 @@ def fillna(self, value=None, downcast=None): DataFrame.fillna : Fill NaN values of a DataFrame. Series.fillna : Fill NaN Values of a Series. """ + if not is_scalar(value): + raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") - value = self._require_scalar(value) if self.hasnans: result = self.putmask(self._isnan, value) if downcast is None: @@ -3211,7 +3212,7 @@ def union(self, other, sort=None): elif not len(other) or self.equals(other): # NB: whether this (and the `if not len(self)` check below) come before - # or after the is_dtype_equal check above affects the returned dtype + # or after the dtype equality check above affects the returned dtype result = self._get_reconciled_name_object(other) if sort is True: return result.sort_values() @@ -5119,16 +5120,6 @@ def _validate_fill_value(self, value): raise TypeError return value - @final - def _require_scalar(self, value): - """ - Check that this is a scalar value that we can use for setitem-like - operations without changing dtype. - """ - if not is_scalar(value): - raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") - return value - def _is_memory_usage_qualified(self) -> bool: """ Return a boolean if we need a qualified .info display. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 62b0e40268716..deff5129ad64d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1116,11 +1116,7 @@ def _engine(self): # calculating the indexer are shifted to 0 sizes = np.ceil( np.log2( - [ - len(level) - + libindex.multiindex_nulls_shift # type: ignore[attr-defined] - for level in self.levels - ] + [len(level) + libindex.multiindex_nulls_shift for level in self.levels] ) ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 85463d803b9a7..ff523862f8770 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -309,11 +309,7 @@ def should_store(self, value: ArrayLike) -> bool: ------- bool """ - # faster equivalent to is_dtype_equal(value.dtype, self.dtype) - try: - return value.dtype == self.dtype - except TypeError: - return False + return value.dtype == self.dtype # --------------------------------------------------------------------- # Apply/Reduce and Helpers diff --git a/pandas/core/ops/invalid.py b/pandas/core/ops/invalid.py index eb27cf7450119..e5ae6d359ac22 100644 --- a/pandas/core/ops/invalid.py +++ b/pandas/core/ops/invalid.py @@ -4,11 +4,15 @@ from __future__ import annotations import operator +from typing import TYPE_CHECKING import numpy as np +if TYPE_CHECKING: + from pandas._typing import npt -def invalid_comparison(left, right, op) -> np.ndarray: + +def invalid_comparison(left, right, op) -> npt.NDArray[np.bool_]: """ If a comparison has mismatched types and is not necessarily meaningful, follow python3 conventions by: diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 9707f5df927fb..fc685935a35fc 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -27,16 +27,10 @@ import numpy as np -from pandas.core.dtypes.common import ( - is_float_dtype, - is_integer_dtype, - is_scalar, -) - from pandas.core import roperator -def _fill_zeros(result, x, y): +def _fill_zeros(result: np.ndarray, x, y): """ If this is a reversed op, then flip x,y @@ -46,11 +40,11 @@ def _fill_zeros(result, x, y): Mask the nan's from x. """ - if is_float_dtype(result.dtype): + if result.dtype.kind == "f": return result is_variable_type = hasattr(y, "dtype") - is_scalar_type = is_scalar(y) + is_scalar_type = not isinstance(y, np.ndarray) if not is_variable_type and not is_scalar_type: # e.g. test_series_ops_name_retention with mod we get here with list/tuple @@ -59,7 +53,7 @@ def _fill_zeros(result, x, y): if is_scalar_type: y = np.array(y) - if is_integer_dtype(y.dtype): + if y.dtype.kind in "iu": ymask = y == 0 if ymask.any(): # GH#7325, mask and nans must be broadcastable @@ -143,7 +137,9 @@ def dispatch_fill_zeros(op, left, right, result): ---------- op : function (operator.add, operator.div, ...) left : object (np.ndarray for non-reversed ops) + We have excluded ExtensionArrays here right : object (np.ndarray for reversed ops) + We have excluded ExtensionArrays here result : ndarray Returns diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index dd9c952cc08ff..b63f3f28b8f6c 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -71,9 +71,9 @@ def get_indexer_indexer( target : Index level : int or level name or list of ints or list of level names ascending : bool or list of bools, default True - kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' - na_position : {'first', 'last'}, default 'last' - sort_remaining : bool, default True + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'} + na_position : {'first', 'last'} + sort_remaining : bool key : callable, optional Returns @@ -424,7 +424,7 @@ def lexsort_indexer( def nargsort( items: ArrayLike | Index | Series, - kind: str = "quicksort", + kind: SortKind = "quicksort", ascending: bool = True, na_position: str = "last", key: Callable | None = None, @@ -440,7 +440,7 @@ def nargsort( Parameters ---------- items : np.ndarray, ExtensionArray, Index, or Series - kind : str, default 'quicksort' + kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort' ascending : bool, default True na_position : {'first', 'last'}, default 'last' key : Optional[Callable], default None @@ -480,10 +480,7 @@ def nargsort( # i.e. ExtensionArray return items.argsort( ascending=ascending, - # error: Argument "kind" to "argsort" of "ExtensionArray" has - # incompatible type "str"; expected "Literal['quicksort', - # 'mergesort', 'heapsort', 'stable']" - kind=kind, # type: ignore[arg-type] + kind=kind, na_position=na_position, ) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index d7f7a0b8801ed..7fa1cb2840fae 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -76,6 +76,8 @@ Self, ) + from pandas import Index + # ----------------------------------------------------------------------------- # -- Helper functions @@ -1256,7 +1258,7 @@ def _harmonize_columns( except KeyError: pass # this column not in results - def _sqlalchemy_type(self, col): + def _sqlalchemy_type(self, col: Index | Series): dtype: DtypeArg = self.dtype or {} if is_dict_like(dtype): dtype = cast(dict, dtype) @@ -1284,7 +1286,8 @@ def _sqlalchemy_type(self, col): # GH 9086: TIMESTAMP is the suggested type if the column contains # timezone information try: - if col.dt.tz is not None: + # error: Item "Index" of "Union[Index, Series]" has no attribute "dt" + if col.dt.tz is not None: # type: ignore[union-attr] return TIMESTAMP(timezone=True) except AttributeError: # The column is actually a DatetimeIndex diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 10859c0fa58c4..aed28efecb696 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -414,7 +414,7 @@ def _datetime_to_stata_elapsed_vec(dates: Series, fmt: str) -> Series: US_PER_DAY = NS_PER_DAY / 1000 def parse_dates_safe( - dates, delta: bool = False, year: bool = False, days: bool = False + dates: Series, delta: bool = False, year: bool = False, days: bool = False ): d = {} if lib.is_np_dtype(dates.dtype, "M"): diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index 88b681d18fa3b..720106590cba3 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -228,6 +228,7 @@ def test_construction_with_conversions(self): "dt1": Timestamp("20130101"), "dt2": date_range("20130101", periods=3).astype("M8[s]"), # 'dt3' : date_range('20130101 00:00:01',periods=3,freq='s'), + # FIXME: don't leave commented-out }, index=range(3), ) @@ -242,6 +243,7 @@ def test_construction_with_conversions(self): # df['dt3'] = np.array(['2013-01-01 00:00:01','2013-01-01 # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]') + # FIXME: don't leave commented-out tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 3380a43126cec..8624e54955d83 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3068,7 +3068,7 @@ def test_from_out_of_bounds_ns_datetime(self, constructor, cls): result = constructor(scalar) item = get1(result) - dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + dtype = tm.get_dtype(result) assert type(item) is Timestamp assert item.asm8.dtype == exp_dtype @@ -3079,7 +3079,7 @@ def test_out_of_s_bounds_datetime64(self, constructor): result = constructor(scalar) item = get1(result) assert type(item) is np.datetime64 - dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + dtype = tm.get_dtype(result) assert dtype == object @pytest.mark.xfail( @@ -3097,7 +3097,7 @@ def test_from_out_of_bounds_ns_timedelta(self, constructor, cls): result = constructor(scalar) item = get1(result) - dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + dtype = tm.get_dtype(result) assert type(item) is Timedelta assert item.asm8.dtype == exp_dtype @@ -3109,7 +3109,7 @@ def test_out_of_s_bounds_timedelta64(self, constructor, cls): result = constructor(scalar) item = get1(result) assert type(item) is cls - dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0] + dtype = tm.get_dtype(result) assert dtype == object def test_tzaware_data_tznaive_dtype(self, constructor, box, frame_or_series): diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py index 9d8a8320eb978..638216302b92f 100644 --- a/pandas/tests/series/methods/test_asof.py +++ b/pandas/tests/series/methods/test_asof.py @@ -5,12 +5,14 @@ from pandas import ( DatetimeIndex, + PeriodIndex, Series, Timestamp, date_range, isna, notna, offsets, + period_range, ) import pandas._testing as tm @@ -114,11 +116,6 @@ def test_with_nan(self): tm.assert_series_equal(result, expected) def test_periodindex(self): - from pandas import ( - PeriodIndex, - period_range, - ) - # array or list or dates N = 50 rng = period_range("1/1/1990", periods=N, freq="H")