diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 5de5bd58bd35f..2f6e97dd3db50 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -928,6 +928,17 @@ Reshaping Sparse ^^^^^^ - Bug in :meth:`SparseArray.take` when using a different fill value than the array's fill value (:issue:`55181`) +- + +ExtensionArray +^^^^^^^^^^^^^^ +- Bug in :class:`Series` constructor giving inconsistent precision for large integer (:issue:`56566`) +- + +Styler +^^^^^^ +- +- Other ^^^^^ diff --git a/pandas/core/construction.py b/pandas/core/construction.py index d41a9c80a10ec..b3fbb75e330ae 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -47,6 +47,8 @@ maybe_promote, ) from pandas.core.dtypes.common import ( + is_float_dtype, + is_integer_dtype, is_list_like, is_object_dtype, is_string_dtype, @@ -503,11 +505,22 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: Convert numpy MaskedArray to ensure mask is softened. """ mask = ma.getmaskarray(data) + original = data + original_dtype = data.dtype if mask.any(): dtype, fill_value = maybe_promote(data.dtype, np.nan) dtype = cast(np.dtype, dtype) data = ma.asarray(data.astype(dtype, copy=True)) data.soften_mask() # set hardmask False if it was True + if not mask.all(): + idx = np.unravel_index(np.nanargmax(data, axis=None), data.shape) + if not mask[idx] and int(data[idx]) != original[idx]: + if ( + is_integer_dtype(original_dtype) + and is_float_dtype(data.dtype) + and len(data) > 0 + ): + data = ma.asarray(original, "object") data[mask] = fill_value else: data = data.copy() diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 55ca1f98f6d6c..f7724ce0a306c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -2157,6 +2157,36 @@ def test_inference_on_pandas_objects(self): result = Series(idx) assert result.dtype != np.object_ + def test_series_constructor_maskedarray_int_overflow(self): + # GH#56566 + mx = ma.masked_array( + [ + 4873214862074861312, + 4875446630161458944, + 4824652147895424384, + 0, + 3526420114272476800, + ], + mask=[0, 0, 0, 1, 0], + ) + result = Series(mx, dtype="Int64") + expected = Series( + IntegerArray( + np.array( + [ + 4873214862074861312, + 4875446630161458944, + 4824652147895424384, + 0, + 3526420114272476800, + ], + dtype="int64", + ), + np.array([0, 0, 0, 1, 0], dtype=np.bool_), + ) + ) + tm.assert_series_equal(result, expected) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self):