From 0583e981fb930983e12a3c56fe47f881feda516f Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 22 Apr 2023 07:20:55 -0700 Subject: [PATCH 1/4] REF: avoid is_scalar --- pandas/core/algorithms.py | 5 ++--- pandas/core/array_algos/replace.py | 4 ++-- pandas/core/arrays/sparse/array.py | 2 -- pandas/core/dtypes/cast.py | 2 -- pandas/core/frame.py | 2 +- pandas/core/generic.py | 4 +--- pandas/core/indexes/base.py | 11 ++++++----- pandas/core/nanops.py | 17 +++++++++-------- pandas/core/strings/base.py | 3 ++- pandas/core/strings/object_array.py | 19 ++++++++++++------- pandas/core/tools/datetimes.py | 4 +--- pandas/io/parsers/base_parser.py | 28 +++++++++++----------------- 12 files changed, 47 insertions(+), 54 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4f771b3c80791..eac0b73d75ad8 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -50,7 +50,6 @@ is_integer_dtype, is_list_like, is_object_dtype, - is_scalar, is_signed_integer_dtype, needs_i8_conversion, ) @@ -1324,7 +1323,7 @@ def searchsorted( # Before searching below, we therefore try to give `value` the # same dtype as `arr`, while guarding against integer overflows. iinfo = np.iinfo(arr.dtype.type) - value_arr = np.array([value]) if is_scalar(value) else np.array(value) + value_arr = np.array([value]) if is_integer(value) else np.array(value) if (value_arr >= iinfo.min).all() and (value_arr <= iinfo.max).all(): # value within bounds, so no overflow, so can convert value dtype # to dtype of arr @@ -1332,7 +1331,7 @@ def searchsorted( else: dtype = value_arr.dtype - if is_scalar(value): + if is_integer(value): # We know that value is int value = cast(int, dtype.type(value)) else: diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index af01a692bc0ce..85d1f7ccf2e88 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -14,9 +14,9 @@ import numpy as np from pandas.core.dtypes.common import ( + is_bool, is_re, is_re_compilable, - is_scalar, ) from pandas.core.dtypes.missing import isna @@ -72,7 +72,7 @@ def _check_comparison_types( Raises an error if the two arrays (a,b) cannot be compared. Otherwise, returns the comparison result as expected. """ - if is_scalar(result) and isinstance(a, np.ndarray): + if is_bool(result) and isinstance(a, np.ndarray): type_names = [type(a).__name__, type(b).__name__] type_names[0] = f"ndarray(dtype={a.dtype})" diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 18a1fc248f7b5..4c32be4849706 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -1120,8 +1120,6 @@ def searchsorted( ) -> npt.NDArray[np.intp] | np.intp: msg = "searchsorted requires high memory usage." warnings.warn(msg, PerformanceWarning, stacklevel=find_stack_level()) - if not is_scalar(v): - v = np.asarray(v) v = np.asarray(v) return np.asarray(self, dtype=self.dtype.subtype).searchsorted(v, side, sorter) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fd8c651fe73dc..5ceabf11b743c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -455,8 +455,6 @@ def maybe_cast_pointwise_result( result maybe casted to the dtype. """ - assert not is_scalar(result) - if isinstance(dtype, ExtensionDtype): if not isinstance(dtype, (CategoricalDtype, DatetimeTZDtype)): # TODO: avoid this special-casing diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 04c1b18cb1af1..89be1c1da34f4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3914,7 +3914,7 @@ def isetitem(self, loc, value) -> None: ``frame[frame.columns[i]] = value``. """ if isinstance(value, DataFrame): - if is_scalar(loc): + if is_integer(loc): loc = [loc] if len(loc) != len(value.columns): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e01d3c44172c2..b6d862ba2180c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -8346,9 +8346,7 @@ def clip( lower, upper = min(lower, upper), max(lower, upper) # fast-path for scalars - if (lower is None or (is_scalar(lower) and is_number(lower))) and ( - upper is None or (is_scalar(upper) and is_number(upper)) - ): + if (lower is None or is_number(lower)) and (upper is None or is_number(upper)): return self._clip_with_scalar(lower, upper, inplace=inplace) result = self diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 75320a28eb16b..8e3fd72acd8e1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -916,6 +916,9 @@ def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): if ufunc.nout == 2: # i.e. np.divmod, np.modf, np.frexp return tuple(self.__array_wrap__(x) for x in result) + elif method == "reduce": + result = lib.item_from_zerodim(result) + return result if result.dtype == np.float16: result = result.astype(np.float32) @@ -928,11 +931,9 @@ def __array_wrap__(self, result, context=None): Gets called after a ufunc and other functions e.g. np.split. """ result = lib.item_from_zerodim(result) - if ( - (not isinstance(result, Index) and is_bool_dtype(result.dtype)) - or lib.is_scalar(result) - or np.ndim(result) > 1 - ): + if (not isinstance(result, Index) and is_bool_dtype(result.dtype)) or np.ndim( + result + ) > 1: # exclude Index to avoid warning from is_bool_dtype deprecation; # in the Index case it doesn't matter which path we go down. # reached in plotting tests with e.g. np.nonzero(index) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 8fddc8461dfbe..b405447239a7a 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -40,7 +40,6 @@ is_integer, is_numeric_dtype, is_object_dtype, - is_scalar, needs_i8_conversion, pandas_dtype, ) @@ -291,7 +290,6 @@ def _get_values( # In _get_values is only called from within nanops, and in all cases # with scalar fill_value. This guarantee is important for the # np.where call below - assert is_scalar(fill_value) mask = _maybe_get_mask(values, skipna, mask) @@ -876,12 +874,15 @@ def _get_counts_nanvar( d = count - dtype.type(ddof) # always return NaN, never inf - if is_scalar(count): + if is_float(count): if count <= ddof: - count = np.nan + # error: Incompatible types in assignment (expression has type + # "float", variable has type "Union[floating[Any], ndarray[Any, + # dtype[floating[Any]]]]") + count = np.nan # type: ignore[assignment] d = np.nan else: - # count is not narrowed by is_scalar check + # count is not narrowed by is_float check count = cast(np.ndarray, count) mask = count <= ddof if mask.any(): @@ -1444,8 +1445,8 @@ def _get_counts( values_shape: Shape, mask: npt.NDArray[np.bool_] | None, axis: AxisInt | None, - dtype: np.dtype = np.dtype(np.float64), -) -> float | np.ndarray: + dtype: np.dtype[np.floating] = np.dtype(np.float64), +) -> np.floating | npt.NDArray[np.floating]: """ Get the count of non-null values along an axis @@ -1476,7 +1477,7 @@ def _get_counts( else: count = values_shape[axis] - if is_scalar(count): + if is_integer(count): return dtype.type(count) return count.astype(dtype, copy=False) diff --git a/pandas/core/strings/base.py b/pandas/core/strings/base.py index 2672d22935d72..44b311ad96387 100644 --- a/pandas/core/strings/base.py +++ b/pandas/core/strings/base.py @@ -5,6 +5,7 @@ TYPE_CHECKING, Callable, Literal, + Sequence, ) import numpy as np @@ -79,7 +80,7 @@ def _str_replace( pass @abc.abstractmethod - def _str_repeat(self, repeats): + def _str_repeat(self, repeats: int | Sequence[int]): pass @abc.abstractmethod diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 294bd0ebe356b..87cc6e71b8672 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -8,6 +8,8 @@ TYPE_CHECKING, Callable, Literal, + Sequence, + cast, ) import unicodedata @@ -17,7 +19,6 @@ import pandas._libs.missing as libmissing import pandas._libs.ops as libops -from pandas.core.dtypes.common import is_scalar from pandas.core.dtypes.missing import isna from pandas.core.strings.base import BaseStringArrayMethods @@ -177,14 +178,15 @@ def _str_replace( return self._str_map(f, dtype=str) - def _str_repeat(self, repeats): - if is_scalar(repeats): + def _str_repeat(self, repeats: int | Sequence[int]): + if lib.is_integer(repeats): + rint = cast(int, repeats) def scalar_rep(x): try: - return bytes.__mul__(x, repeats) + return bytes.__mul__(x, rint) except TypeError: - return str.__mul__(x, repeats) + return str.__mul__(x, rint) return self._str_map(scalar_rep, dtype=str) else: @@ -198,8 +200,11 @@ def rep(x, r): except TypeError: return str.__mul__(x, r) - repeats = np.asarray(repeats, dtype=object) - result = libops.vec_binop(np.asarray(self), repeats, rep) + result = libops.vec_binop( + np.asarray(self), + np.asarray(repeats, dtype=object), + rep, + ) if isinstance(self, BaseStringArray): # Not going through map, so we have to do this here. result = type(self)._from_sequence(result) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 3e1b6070ffc39..74210a1ce5ad8 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -56,7 +56,6 @@ is_integer_dtype, is_list_like, is_numeric_dtype, - is_scalar, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( @@ -599,8 +598,7 @@ def _adjust_to_origin(arg, origin, unit): else: # arg must be numeric if not ( - (is_scalar(arg) and (is_integer(arg) or is_float(arg))) - or is_numeric_dtype(np.asarray(arg)) + (is_integer(arg) or is_float(arg)) or is_numeric_dtype(np.asarray(arg)) ): raise ValueError( f"'{arg}' is not compatible with origin='{origin}'; " diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 0bac882756a91..93f9609fa64c1 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -250,8 +250,8 @@ def _has_complex_date_col(self) -> bool: @final def _should_parse_dates(self, i: int) -> bool: - if isinstance(self.parse_dates, bool): - return self.parse_dates + if lib.is_bool(self.parse_dates): + return bool(self.parse_dates) else: if self.index_names is not None: name = self.index_names[i] @@ -259,14 +259,9 @@ def _should_parse_dates(self, i: int) -> bool: name = None j = i if self.index_col is None else self.index_col[i] - if is_scalar(self.parse_dates): - return (j == self.parse_dates) or ( - name is not None and name == self.parse_dates - ) - else: - return (j in self.parse_dates) or ( - name is not None and name in self.parse_dates - ) + return (j in self.parse_dates) or ( + name is not None and name in self.parse_dates + ) @final def _extract_multi_indexer_columns( @@ -1370,13 +1365,12 @@ def _validate_parse_dates_arg(parse_dates): "for the 'parse_dates' parameter" ) - if parse_dates is not None: - if is_scalar(parse_dates): - if not lib.is_bool(parse_dates): - raise TypeError(msg) - - elif not isinstance(parse_dates, (list, dict)): - raise TypeError(msg) + if not ( + parse_dates is None + or lib.is_bool(parse_dates) + or isinstance(parse_dates, (list, dict)) + ): + raise TypeError(msg) return parse_dates From 1e008b1226572eb9cb1f434e3c8d35e5ca8266a5 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Apr 2023 12:20:02 -0700 Subject: [PATCH 2/4] comment --- pandas/core/ops/missing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py index 5a2ec11ac2c15..9707f5df927fb 100644 --- a/pandas/core/ops/missing.py +++ b/pandas/core/ops/missing.py @@ -53,6 +53,7 @@ def _fill_zeros(result, x, y): is_scalar_type = is_scalar(y) if not is_variable_type and not is_scalar_type: + # e.g. test_series_ops_name_retention with mod we get here with list/tuple return result if is_scalar_type: From b6e76fd3a58940e6aac1fbf8dba064a5904cd96b Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Apr 2023 15:33:33 -0700 Subject: [PATCH 3/4] infer_dtype->is_bool_array --- pandas/core/common.py | 3 +-- pandas/core/series.py | 5 +---- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index da99b72d60302..0f0d40ccdc4f3 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -44,7 +44,6 @@ ABCSeries, ) from pandas.core.dtypes.inference import iterable_not_string -from pandas.core.dtypes.missing import isna if TYPE_CHECKING: from pandas._typing import ( @@ -129,7 +128,7 @@ def is_bool_indexer(key: Any) -> bool: if not lib.is_bool_array(key_array): na_msg = "Cannot mask with non-boolean array containing NA / NaN values" - if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any(): + if lib.is_bool_array(key_array, skipna=True): # Don't raise on e.g. ["A", "B", np.nan], see # test_loc_getitem_list_of_labels_categoricalindex_with_na raise ValueError(na_msg) diff --git a/pandas/core/series.py b/pandas/core/series.py index 53b56fc8686bf..2b71eb4a9480d 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1019,10 +1019,7 @@ def _get_with(self, key): if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)): key = list(key) - if isinstance(key, Index): - key_type = key.inferred_type - else: - key_type = lib.infer_dtype(key, skipna=False) + key_type = lib.infer_dtype(key, skipna=False) # Note: The key_type == "boolean" case should be caught by the # com.is_bool_indexer check in __getitem__ From 54719796e38c435dd0619648962cb1b6fea208b0 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 27 Apr 2023 18:53:17 -0700 Subject: [PATCH 4/4] fix invalid refs --- pandas/tests/frame/methods/test_reindex.py | 3 +-- pandas/tests/frame/test_arithmetic.py | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py index d481f25e9e0fe..a96dec5f34ce1 100644 --- a/pandas/tests/frame/methods/test_reindex.py +++ b/pandas/tests/frame/methods/test_reindex.py @@ -23,7 +23,6 @@ ) import pandas._testing as tm from pandas.api.types import CategoricalDtype as CDT -import pandas.core.common as com class TestReindexSetIndex: @@ -355,7 +354,7 @@ def test_reindex_frame_add_nat(self): result = df.reindex(range(15)) assert np.issubdtype(result["B"].dtype, np.dtype("M8[ns]")) - mask = com.isna(result)["B"] + mask = isna(result)["B"] assert mask[-5:].all() assert not mask[:-5].any() diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 090b3d64e7c41..c71ceae762e67 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -21,7 +21,6 @@ Series, ) import pandas._testing as tm -import pandas.core.common as com from pandas.core.computation import expressions as expr from pandas.core.computation.expressions import ( _MIN_ELEMENTS, @@ -1246,12 +1245,12 @@ def test_operators_none_as_na(self, op): filled = df.fillna(np.nan) result = op(df, 3) expected = op(filled, 3).astype(object) - expected[com.isna(expected)] = None + expected[pd.isna(expected)] = None tm.assert_frame_equal(result, expected) result = op(df, df) expected = op(filled, filled).astype(object) - expected[com.isna(expected)] = None + expected[pd.isna(expected)] = None tm.assert_frame_equal(result, expected) result = op(df, df.fillna(7))