From 57bf94bfaeaaab7074dda04f66d0485d1057b67d Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 2 Feb 2023 21:50:28 -0500 Subject: [PATCH 1/3] CLN: Partially revert #29553 --- pandas/core/array_algos/replace.py | 13 +------ pandas/core/dtypes/common.py | 60 ------------------------------ pandas/core/dtypes/missing.py | 7 ---- pandas/core/missing.py | 23 +++++------- pandas/tests/dtypes/test_common.py | 17 --------- 5 files changed, 10 insertions(+), 110 deletions(-) diff --git a/pandas/core/array_algos/replace.py b/pandas/core/array_algos/replace.py index 466eeb768f5f9..14bf26f40ea0d 100644 --- a/pandas/core/array_algos/replace.py +++ b/pandas/core/array_algos/replace.py @@ -19,8 +19,6 @@ ) from pandas.core.dtypes.common import ( - is_datetimelike_v_numeric, - is_numeric_v_string_like, is_re, is_re_compilable, is_scalar, @@ -44,7 +42,7 @@ def should_use_regex(regex: bool, to_replace: Any) -> bool: def compare_or_regex_search( a: ArrayLike, b: Scalar | Pattern, regex: bool, mask: npt.NDArray[np.bool_] -) -> ArrayLike | bool: +) -> ArrayLike: """ Compare two array-like inputs of the same shape or two scalar values @@ -95,15 +93,6 @@ def _check_comparison_types( if isinstance(a, np.ndarray): a = a[mask] - if is_numeric_v_string_like(a, b): - # GH#29553 avoid deprecation warnings from numpy - return np.zeros(a.shape, dtype=bool) - - elif is_datetimelike_v_numeric(a, b): - # GH#29553 avoid deprecation warnings from numpy - _check_comparison_types(False, a, b) - return False - result = op(a) if isinstance(result, np.ndarray) and mask is not None: diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index fb9817de2b69b..3281c7fe859e5 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1067,64 +1067,6 @@ def is_numeric_v_string_like(a: ArrayLike, b) -> bool: ) -# This exists to silence numpy deprecation warnings, see GH#29553 -def is_datetimelike_v_numeric(a, b) -> bool: - """ - Check if we are comparing a datetime-like object to a numeric object. - By "numeric," we mean an object that is either of an int or float dtype. - - Parameters - ---------- - a : array-like, scalar - The first object to check. - b : array-like, scalar - The second object to check. - - Returns - ------- - boolean - Whether we return a comparing a datetime-like to a numeric object. - - Examples - -------- - >>> from datetime import datetime - >>> dt = np.datetime64(datetime(2017, 1, 1)) - >>> - >>> is_datetimelike_v_numeric(1, 1) - False - >>> is_datetimelike_v_numeric(dt, dt) - False - >>> is_datetimelike_v_numeric(1, dt) - True - >>> is_datetimelike_v_numeric(dt, 1) # symmetric check - True - >>> is_datetimelike_v_numeric(np.array([dt]), 1) - True - >>> is_datetimelike_v_numeric(np.array([1]), dt) - True - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([1])) - True - >>> is_datetimelike_v_numeric(np.array([1]), np.array([2])) - False - >>> is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) - False - """ - if not hasattr(a, "dtype"): - a = np.asarray(a) - if not hasattr(b, "dtype"): - b = np.asarray(b) - - def is_numeric(x): - """ - Check if an object has a numeric dtype (i.e. integer or float). - """ - return is_integer_dtype(x) or is_float_dtype(x) - - return (needs_i8_conversion(a) and is_numeric(b)) or ( - needs_i8_conversion(b) and is_numeric(a) - ) - - def needs_i8_conversion(arr_or_dtype) -> bool: """ Check whether the array or dtype should be converted to int64. @@ -1790,7 +1732,6 @@ def is_all_strings(value: ArrayLike) -> bool: "is_datetime64_dtype", "is_datetime64_ns_dtype", "is_datetime64tz_dtype", - "is_datetimelike_v_numeric", "is_datetime_or_timedelta_dtype", "is_decimal", "is_dict_like", @@ -1809,7 +1750,6 @@ def is_all_strings(value: ArrayLike) -> bool: "is_number", "is_numeric_dtype", "is_any_numeric_dtype", - "is_numeric_v_string_like", "is_object_dtype", "is_period_dtype", "is_re", diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 5b97cb879d663..211b67d3590ed 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -28,7 +28,6 @@ is_bool_dtype, is_categorical_dtype, is_complex_dtype, - is_datetimelike_v_numeric, is_dtype_equal, is_extension_array_dtype, is_float_dtype, @@ -505,8 +504,6 @@ def array_equivalent( # fastpath when we require that the dtypes match (Block.equals) if left.dtype.kind in ["f", "c"]: return _array_equivalent_float(left, right) - elif is_datetimelike_v_numeric(left.dtype, right.dtype): - return False elif needs_i8_conversion(left.dtype): return _array_equivalent_datetimelike(left, right) elif is_string_or_object_np_dtype(left.dtype): @@ -529,10 +526,6 @@ def array_equivalent( return True return ((left == right) | (isna(left) & isna(right))).all() - elif is_datetimelike_v_numeric(left, right): - # GH#29553 avoid numpy deprecation warning - return False - elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype): # datetime64, timedelta64, Period if not is_dtype_equal(left.dtype, right.dtype): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 162186bc4186a..c1f513426f1b2 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -31,7 +31,6 @@ from pandas.core.dtypes.cast import infer_dtype_from from pandas.core.dtypes.common import ( is_array_like, - is_numeric_v_string_like, is_object_dtype, needs_i8_conversion, ) @@ -96,20 +95,16 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # GH 21977 mask = np.zeros(arr.shape, dtype=bool) for x in nonna: - if is_numeric_v_string_like(arr, x): - # GH#29553 prevent numpy deprecation warnings - pass + if potential_na: + new_mask = np.zeros(arr.shape, dtype=np.bool_) + new_mask[arr_mask] = arr[arr_mask] == x else: - if potential_na: - new_mask = np.zeros(arr.shape, dtype=np.bool_) - new_mask[arr_mask] = arr[arr_mask] == x - else: - new_mask = arr == x - - if not isinstance(new_mask, np.ndarray): - # usually BooleanArray - new_mask = new_mask.to_numpy(dtype=bool, na_value=False) - mask |= new_mask + new_mask = arr == x + + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + mask |= new_mask if na_mask.any(): mask |= isna(arr) diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ce900ff649eec..73abea30029b1 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -1,7 +1,5 @@ from __future__ import annotations -from datetime import datetime - import numpy as np import pytest @@ -517,21 +515,6 @@ def test_is_numeric_v_string_like(): assert com.is_numeric_v_string_like(np.array(["foo"]), np.array([1, 2])) -def test_is_datetimelike_v_numeric(): - dt = np.datetime64(datetime(2017, 1, 1)) - - assert not com.is_datetimelike_v_numeric(1, 1) - assert not com.is_datetimelike_v_numeric(dt, dt) - assert not com.is_datetimelike_v_numeric(np.array([1]), np.array([2])) - assert not com.is_datetimelike_v_numeric(np.array([dt]), np.array([dt])) - - assert com.is_datetimelike_v_numeric(1, dt) - assert com.is_datetimelike_v_numeric(1, dt) - assert com.is_datetimelike_v_numeric(np.array([dt]), 1) - assert com.is_datetimelike_v_numeric(np.array([1]), dt) - assert com.is_datetimelike_v_numeric(np.array([dt]), np.array([1])) - - def test_needs_i8_conversion(): assert not com.needs_i8_conversion(str) assert not com.needs_i8_conversion(np.int64) From ad406b53e07ad7a0afb0b32b8ce1c99a3ca8888b Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 4 Feb 2023 13:09:04 -0500 Subject: [PATCH 2/3] fix tests --- pandas/core/internals/blocks.py | 5 +---- pandas/core/missing.py | 6 +++++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index bb555690b867a..35aaa83a69380 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -677,10 +677,7 @@ def replace_list( # GH#38086 faster if we know we dont need to check for regex masks = [missing.mask_missing(values, s[0]) for s in pairs] - # error: Argument 1 to "extract_bool_array" has incompatible type - # "Union[ExtensionArray, ndarray, bool]"; expected "Union[ExtensionArray, - # ndarray]" - masks = [extract_bool_array(x) for x in masks] # type: ignore[arg-type] + masks = [extract_bool_array(x) for x in masks] rb = [self if inplace else self.copy()] for i, (src, dest) in enumerate(pairs): diff --git a/pandas/core/missing.py b/pandas/core/missing.py index c1f513426f1b2..62318aa9ab4f4 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -31,6 +31,7 @@ from pandas.core.dtypes.cast import infer_dtype_from from pandas.core.dtypes.common import ( is_array_like, + is_numeric_v_string_like, is_object_dtype, needs_i8_conversion, ) @@ -95,7 +96,10 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: # GH 21977 mask = np.zeros(arr.shape, dtype=bool) for x in nonna: - if potential_na: + if is_numeric_v_string_like(arr, x): + # GH#29553 prevent numpy deprecation warnings + pass + elif potential_na: new_mask = np.zeros(arr.shape, dtype=np.bool_) new_mask[arr_mask] = arr[arr_mask] == x else: From 8e414db6b3d63cc1700b967fac777aa57135c20d Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 4 Feb 2023 17:13:40 -0500 Subject: [PATCH 3/3] Update missing.py --- pandas/core/missing.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 62318aa9ab4f4..162186bc4186a 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -99,16 +99,17 @@ def mask_missing(arr: ArrayLike, values_to_mask) -> npt.NDArray[np.bool_]: if is_numeric_v_string_like(arr, x): # GH#29553 prevent numpy deprecation warnings pass - elif potential_na: - new_mask = np.zeros(arr.shape, dtype=np.bool_) - new_mask[arr_mask] = arr[arr_mask] == x else: - new_mask = arr == x - - if not isinstance(new_mask, np.ndarray): - # usually BooleanArray - new_mask = new_mask.to_numpy(dtype=bool, na_value=False) - mask |= new_mask + if potential_na: + new_mask = np.zeros(arr.shape, dtype=np.bool_) + new_mask[arr_mask] = arr[arr_mask] == x + else: + new_mask = arr == x + + if not isinstance(new_mask, np.ndarray): + # usually BooleanArray + new_mask = new_mask.to_numpy(dtype=bool, na_value=False) + mask |= new_mask if na_mask.any(): mask |= isna(arr)