From f780345d7687733f2f64679fa3eb73603f038f02 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 08:52:05 -0700 Subject: [PATCH 1/5] REF: Avoid np.can_cast for scalar inference for NEP 50 --- pandas/core/dtypes/cast.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 27625db766862..d6882fee7aec3 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,7 +699,9 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - if not np.can_cast(fill_value, dtype): + try: + np_can_hold_element(dtype, fill_value) + except (LossySetitemError, NotImplementedError): # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) @@ -1751,9 +1753,14 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if dtype.kind in "iu": if isinstance(element, range): - if _dtype_can_hold_range(element, dtype): + if not len(element): + return True + try: + np_can_hold_element(dtype, element.start) + np_can_hold_element(dtype, element.stop) return element - raise LossySetitemError + except (LossySetitemError, NotImplementedError) as err: + raise LossySetitemError from err if is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1906,14 +1913,3 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError raise NotImplementedError(dtype) - - -def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: - """ - _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), - but in many cases a range can be held by a smaller integer dtype. - Check if this is one of those cases. - """ - if not len(rng): - return True - return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype) From 64b03f939c1bed6aebd44c09fb688fbf6299201a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 09:19:04 -0700 Subject: [PATCH 2/5] Use helper function --- pandas/core/dtypes/cast.py | 45 +++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d6882fee7aec3..2b146010b2e12 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,9 +699,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - try: - np_can_hold_element(dtype, fill_value) - except (LossySetitemError, NotImplementedError): + if not np_can_cast_scalar(fill_value, dtype): # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) @@ -1753,14 +1751,9 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: if dtype.kind in "iu": if isinstance(element, range): - if not len(element): - return True - try: - np_can_hold_element(dtype, element.start) - np_can_hold_element(dtype, element.stop) + if _dtype_can_hold_range(element, dtype): return element - except (LossySetitemError, NotImplementedError) as err: - raise LossySetitemError from err + raise LossySetitemError if is_integer(element) or (is_float(element) and element.is_integer()): # e.g. test_setitem_series_int8 if we have a python int 1 @@ -1913,3 +1906,35 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: raise LossySetitemError raise NotImplementedError(dtype) + + +def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: + """ + _maybe_infer_dtype_type infers to int64 (and float64 for very large endpoints), + but in many cases a range can be held by a smaller integer dtype. + Check if this is one of those cases. + """ + if not len(rng): + return True + return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.end, dtype) + + +def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: + """ + np.can_cast pandas-equivalent for pre 2-0 behavior that allowed scalar + inference + + Parameters + ---------- + element : Scalar + dtype : np.dtype + + Returns + ------- + bool + """ + try: + np_can_hold_element(dtype, element) + return True + except (LossySetitemError, NotImplementedError): + return False From 2b5c4b0a23ae35c026b9844554029f993ee7a5f4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 09:56:59 -0700 Subject: [PATCH 3/5] end->stop --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2b146010b2e12..8191476944d22 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1916,7 +1916,7 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool: """ if not len(rng): return True - return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.end, dtype) + return np_can_cast_scalar(rng.start, dtype) and np_can_cast_scalar(rng.stop, dtype) def np_can_cast_scalar(element: Scalar, dtype: np.dtype) -> bool: From c55e46ec5f41f242d94dd9936959ebd84e69f7a3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 11:18:53 -0700 Subject: [PATCH 4/5] ignore typing --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8191476944d22..716d1a78f93c5 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -699,7 +699,7 @@ def _maybe_promote(dtype: np.dtype, fill_value=np.nan): dtype = np.dtype(np.object_) elif issubclass(dtype.type, np.integer): - if not np_can_cast_scalar(fill_value, dtype): + if not np_can_cast_scalar(fill_value, dtype): # type: ignore[arg-type] # upcast to prevent overflow mst = np.min_scalar_type(fill_value) dtype = np.promote_types(dtype, mst) From 6176b11e830fb72e2b32ea00b55645621dd3c0cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 26 Oct 2023 14:11:43 -0700 Subject: [PATCH 5/5] Address NEP 50 later --- ci/run_tests.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 48ef21686a26f..6a70ea1df3e71 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,8 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +# TODO: Support NEP 50 and remove NPY_PROMOTION_STATE +PYTEST_CMD="NPY_PROMOTION_STATE=legacy MESONPY_EDITABLE_VERBOSE=1 PYTHONDEVMODE=1 PYTHONWARNDEFAULTENCODING=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\""