From ceffe6d701a27784d0037df200ae0bebec858020 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 16 Jun 2022 14:53:02 -0700 Subject: [PATCH 1/8] CI/TST: Don't require length for construct_1d_arraylike_from_scalar cast to float64 --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index ed3f9ee525c9e..bc218e6a84303 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1696,7 +1696,7 @@ def construct_1d_arraylike_from_scalar( else: - if length and is_integer_dtype(dtype) and isna(value): + if is_integer_dtype(dtype) and isna(value): # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): From d324332af8c15c05da88bd559c97839619f0dda4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 16 Jun 2022 15:42:32 -0700 Subject: [PATCH 2/8] Just short circuit --- pandas/core/dtypes/cast.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index bc218e6a84303..144bcb9bc9ea6 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1697,6 +1697,9 @@ def construct_1d_arraylike_from_scalar( else: if is_integer_dtype(dtype) and isna(value): + if not length: + # numpy > 1.24 will raise filling np.nan into int dtypes + return np.array([], dtype=dtype) # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): From cdcf033f14a179ec3841017e481baa36d9935929 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 16 Jun 2022 17:16:07 -0700 Subject: [PATCH 3/8] Add errstate --- pandas/core/construction.py | 4 +++- pandas/core/dtypes/cast.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 8d26284a5ce45..cd83f0444075a 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -567,7 +567,9 @@ def sanitize_array( FutureWarning, stacklevel=find_stack_level(), ) - subarr = np.array(data, copy=copy) + # GH 47391 numpy > 1.24 will raise a RuntimeError for this behavior too. + with np.errstate(invalid="ignore"): + subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: # i.e. called via DataFrame constructor diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 144bcb9bc9ea6..007c21cace8bd 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1698,7 +1698,7 @@ def construct_1d_arraylike_from_scalar( if is_integer_dtype(dtype) and isna(value): if not length: - # numpy > 1.24 will raise filling np.nan into int dtypes + # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes return np.array([], dtype=dtype) # coerce if we have nan for an integer dtype dtype = np.dtype("float64") From 1a82a00bfd2bfbd7ac24c98228e9c7d9c7bf1a70 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 17 Jun 2022 10:15:57 -0700 Subject: [PATCH 4/8] Move errstate --- pandas/core/construction.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index cd83f0444075a..4b63d492ec1dd 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -556,7 +556,10 @@ def sanitize_array( if dtype is not None and is_float_dtype(data.dtype) and is_integer_dtype(dtype): # possibility of nan -> garbage try: - subarr = _try_cast(data, dtype, copy, True) + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + # casting aligning with IntCastingNaNError below + with np.errstate(invalid="ignore"): + subarr = _try_cast(data, dtype, copy, True) except IntCastingNaNError: warnings.warn( "In a future version, passing float-dtype values containing NaN " @@ -567,9 +570,7 @@ def sanitize_array( FutureWarning, stacklevel=find_stack_level(), ) - # GH 47391 numpy > 1.24 will raise a RuntimeError for this behavior too. - with np.errstate(invalid="ignore"): - subarr = np.array(data, copy=copy) + subarr = np.array(data, copy=copy) except ValueError: if not raise_cast_failure: # i.e. called via DataFrame constructor From 536786f658c646befb4dea430bd0f00c1a3b5af9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 17 Jun 2022 22:43:29 -0700 Subject: [PATCH 5/8] Add errstate to merge --- pandas/core/reshape/merge.py | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 4227d43c459d0..aeed0daf37f16 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1200,23 +1200,27 @@ def _maybe_coerce_merge_keys(self) -> None: # check whether ints and floats elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): - if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): - warnings.warn( - "You are merging on int and float " - "columns where the float values " - "are not equal to their int representation.", - UserWarning, - ) + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + with np.errstate(ignore="invalid"): + if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + ) continue elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): - if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): - warnings.warn( - "You are merging on int and float " - "columns where the float values " - "are not equal to their int representation.", - UserWarning, - ) + # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int + with np.errstate(ignore="invalid"): + if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): + warnings.warn( + "You are merging on int and float " + "columns where the float values " + "are not equal to their int representation.", + UserWarning, + ) continue # let's infer and see if we are ok From 106449dcd4b6fcfdaabebeb2b04fbdef9dbc0b22 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 18 Jun 2022 16:30:03 -0700 Subject: [PATCH 6/8] Fix typo --- pandas/core/reshape/merge.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index aeed0daf37f16..6ce5ffac9de52 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1201,7 +1201,7 @@ def _maybe_coerce_merge_keys(self) -> None: # check whether ints and floats elif is_integer_dtype(rk.dtype) and is_float_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int - with np.errstate(ignore="invalid"): + with np.errstate(invalid="ignore"): if not (lk == lk.astype(rk.dtype))[~np.isnan(lk)].all(): warnings.warn( "You are merging on int and float " @@ -1213,7 +1213,7 @@ def _maybe_coerce_merge_keys(self) -> None: elif is_float_dtype(rk.dtype) and is_integer_dtype(lk.dtype): # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int - with np.errstate(ignore="invalid"): + with np.errstate(invalid="ignore"): if not (rk == rk.astype(lk.dtype))[~np.isnan(rk)].all(): warnings.warn( "You are merging on int and float " From ffd11717c51cb38867397f17ab9b950442737fa8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 21 Jun 2022 10:36:55 -0700 Subject: [PATCH 7/8] Move length check --- pandas/core/dtypes/cast.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 007c21cace8bd..3dc9d5f60f163 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1697,9 +1697,6 @@ def construct_1d_arraylike_from_scalar( else: if is_integer_dtype(dtype) and isna(value): - if not length: - # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes - return np.array([], dtype=dtype) # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): @@ -1712,7 +1709,9 @@ def construct_1d_arraylike_from_scalar( value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype) subarr = np.empty(length, dtype=dtype) - subarr.fill(value) + if length: + # GH 47391: numpy > 1.24 will raise filling np.nan into int dtypes + subarr.fill(value) return subarr From 57d43c2bd20c954afb2bfe32d57a9450d5dbac10 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 21 Jun 2022 11:06:42 -0700 Subject: [PATCH 8/8] Add back length check --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3dc9d5f60f163..3f16632353a9d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1696,7 +1696,7 @@ def construct_1d_arraylike_from_scalar( else: - if is_integer_dtype(dtype) and isna(value): + if length and is_integer_dtype(dtype) and isna(value): # coerce if we have nan for an integer dtype dtype = np.dtype("float64") elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):