From bc9ed49445978574f1c4421738577e4199c61130 Mon Sep 17 00:00:00 2001 From: parthiban Date: Fri, 22 Dec 2023 15:27:30 +0530 Subject: [PATCH 1/5] BUG: Fix inconsistency when constructing a Series with large integers in a int64 masked array - Refered code from PR#50757 similar issue for non masked ints --- pandas/core/construction.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d41a9c80a10ec..5dbd533b5d853 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -47,6 +47,8 @@ maybe_promote, ) from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype, @@ -503,11 +505,29 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: Convert numpy MaskedArray to ensure mask is softened. """ mask = ma.getmaskarray(data) + original = data + original_dtype = data.dtype if mask.any(): dtype, fill_value = maybe_promote(data.dtype, np.nan) dtype = cast(np.dtype, dtype) data = ma.asarray(data.astype(dtype, copy=True)) data.soften_mask() # set hardmask False if it was True + if not mask.all(): + idx = np.unravel_index(np.nanargmax(data, axis=None), data.shape) + if not mask[idx] and int(data[idx]) != original[idx]: + if ( + is_integer_dtype(original_dtype) + and is_float_dtype(data.dtype) + and len(data) > 0 + ): + inferred_type = lib.infer_dtype(original, skipna=True) + if ( + inferred_type not in ["floating", "mixed-integer-float"] + and not mask.any() + ): + data = np.array(original, dtype=dtype, copy=False) + else: + data = np.array(original, dtype="object", copy=False) data[mask] = fill_value else: data = data.copy() From 5ae5ca9fc8d81d70146746ed91173c243cb02a18 Mon Sep 17 00:00:00 2001 From: parthiban Date: Fri, 22 Dec 2023 18:36:32 +0530 Subject: [PATCH 2/5] BUG: Fix mypy typing error --- pandas/core/construction.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 5dbd533b5d853..d4efb16425f66 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -525,9 +525,9 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: inferred_type not in ["floating", "mixed-integer-float"] and not mask.any() ): - data = np.array(original, dtype=dtype, copy=False) + data = ma.asarray(original, dtype) else: - data = np.array(original, dtype="object", copy=False) + data = ma.asarray(original, "object") data[mask] = fill_value else: data = data.copy() From 5d31e3bbd0fb559b687a95a50ebdb6b863fc5680 Mon Sep 17 00:00:00 2001 From: parthiban Date: Fri, 22 Dec 2023 22:34:00 +0530 Subject: [PATCH 3/5] BUG: Remove infered dtype block since it is redundant --- pandas/core/construction.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d4efb16425f66..b3fbb75e330ae 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -520,14 +520,7 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: and is_float_dtype(data.dtype) and len(data) > 0 ): - inferred_type = lib.infer_dtype(original, skipna=True) - if ( - inferred_type not in ["floating", "mixed-integer-float"] - and not mask.any() - ): - data = ma.asarray(original, dtype) - else: - data = ma.asarray(original, "object") + data = ma.asarray(original, "object") data[mask] = fill_value else: data = data.copy() From 7cb24d3c1d899ff5092644f4c56818f144f03f6b Mon Sep 17 00:00:00 2001 From: parthiban Date: Fri, 22 Dec 2023 22:35:38 +0530 Subject: [PATCH 4/5] TST: Add test case for integer precision inconsistency --- pandas/tests/series/test_constructors.py | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 55ca1f98f6d6c..f7724ce0a306c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2157,6 +2157,36 @@ def test_inference_on_pandas_objects(self): result = Series(idx) assert result.dtype != np.object_ + def test_series_constructor_maskedarray_int_overflow(self): + # GH#56566 + mx = ma.masked_array( + [ + 4873214862074861312, + 4875446630161458944, + 4824652147895424384, + 0, + 3526420114272476800, + ], + mask=[0, 0, 0, 1, 0], + ) + result = Series(mx, dtype="Int64") + expected = Series( + IntegerArray( + np.array( + [ + 4873214862074861312, + 4875446630161458944, + 4824652147895424384, + 0, + 3526420114272476800, + ], + dtype="int64", + ), + np.array([0, 0, 0, 1, 0], dtype=np.bool_), + ) + ) + tm.assert_series_equal(result, expected) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): From bb142c84aca293ee28800c8c48efc9e755587054 Mon Sep 17 00:00:00 2001 From: parthiban Date: Fri, 22 Dec 2023 22:46:29 +0530 Subject: [PATCH 5/5] DOC: Update whatsnew --- doc/source/whatsnew/v2.2.0.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 5de5bd58bd35f..2f6e97dd3db50 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -928,6 +928,17 @@ Reshaping Sparse ^^^^^^ - Bug in :meth:`SparseArray.take` when using a different fill value than the array's fill value (:issue:`55181`) +- + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :class:`Series` constructor giving inconsistent precision for large integer (:issue:`56566`) +- + +Styler +^^^^^^ +- +- Other ^^^^^