From 8340f09bc6e90d708011f1ab87cdb63c5a8904d8 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Wed, 6 Nov 2024 20:32:20 +0800 Subject: [PATCH 1/8] first --- pandas/core/construction.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1e1292f8ef089..57fd3fe57e88e 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -32,7 +32,6 @@ maybe_cast_to_integer_array, maybe_convert_platform, maybe_infer_to_datetimelike, - maybe_promote, ) from pandas.core.dtypes.common import ( ensure_object, @@ -513,10 +512,17 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: """ mask = ma.getmaskarray(data) if mask.any(): - dtype, fill_value = maybe_promote(data.dtype, np.nan) - dtype = cast(np.dtype, dtype) + dtype = cast(np.dtype, data.dtype) data = ma.asarray(data.astype(dtype, copy=True)) data.soften_mask() # set hardmask False if it was True + + if np.issubdtype(dtype, np.integer): + fill_value: int | float | None = np.iinfo(dtype).min + elif np.issubdtype(dtype, np.floating): + fill_value = np.nan + else: + fill_value = None + data[mask] = fill_value else: data = data.copy() From c4fb9dc4364e1ca662821b84b92a62ff7873d861 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Fri, 8 Nov 2024 13:42:14 +0800 Subject: [PATCH 2/8] fix bug --- pandas/core/construction.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 57fd3fe57e88e..facaa6aad6120 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -32,6 +32,7 @@ maybe_cast_to_integer_array, maybe_convert_platform, maybe_infer_to_datetimelike, + maybe_promote, ) from pandas.core.dtypes.common import ( ensure_object, @@ -508,22 +509,21 @@ def ensure_wrapped_if_datetimelike(arr): def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: """ - Convert numpy MaskedArray to ensure mask is softened. + Convert numpy MaskedArray to ensure mask is softened, + """ mask = ma.getmaskarray(data) if mask.any(): dtype = cast(np.dtype, data.dtype) - data = ma.asarray(data.astype(dtype, copy=True)) - data.soften_mask() # set hardmask False if it was True - - if np.issubdtype(dtype, np.integer): - fill_value: int | float | None = np.iinfo(dtype).min - elif np.issubdtype(dtype, np.floating): - fill_value = np.nan + if isinstance(dtype, ExtensionDtype) and dtype.name.startswith("Masked"): + data = ma.asarray(data.astype(dtype, copy=True)) + data.soften_mask() # If the data is a Masked EA, directly soften the mask. else: - fill_value = None - - data[mask] = fill_value + dtype, fill_value = maybe_promote(data.dtype, np.nan) + dtype = cast(np.dtype, dtype) + data = ma.asarray(data.astype(dtype, copy=True)) + data.soften_mask() # set hardmask False if it was True + data[mask] = fill_value else: data = data.copy() return data From 3d6abaaa35695aa3604562df0a3622183d206773 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Fri, 8 Nov 2024 15:22:09 +0800 Subject: [PATCH 3/8] fix bug2 --- pandas/core/construction.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 1d1f8ba24c0bd..baf2e63918829 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -520,7 +520,6 @@ def sanitize_masked_array(data: ma.MaskedArray) -> np.ndarray: data.soften_mask() # If the data is a Masked EA, directly soften the mask. else: dtype, fill_value = maybe_promote(data.dtype, np.nan) - dtype = cast(np.dtype, dtype) data = ma.asarray(data.astype(dtype, copy=True)) data.soften_mask() # set hardmask False if it was True data[mask] = fill_value From e9ae3770a6aa73e96042d4b76836097c2eecbd6d Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Mon, 11 Nov 2024 22:28:44 +0800 Subject: [PATCH 4/8] add test --- .../dtypes/cast/test_construct_ndarray.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index 6b9b2dfda6e8b..d1140c73d09b4 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -14,6 +14,31 @@ (["1", "2", None], None, np.array(["1", "2", None])), (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])), ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "int64", + np.array([1, 2, 3], dtype=np.int64), + ), + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "float64", + np.array([1, 2, 3], dtype=np.float64), + ), + ( + np.ma.masked_array([1, 2, 3], mask=[False, True, False]), + "UInt64", + np.array([1, 2, 3], dtype=np.uint64), + ), + ( + np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]), + "float64", + np.array([1.0, 2.0, 3.0], dtype=np.float64), + ), + ( + np.ma.masked_array([1.0, 2.0, 3.0], mask=[False, True, False]), + "Int64", + np.array([1, 2, 3], dtype=np.int64), + ), ], ) def test_construct_1d_ndarray_preserving_na( @@ -34,3 +59,8 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): result = sanitize_array(arr, index=None, dtype=np.dtype(object)) tm.assert_numpy_array_equal(result, expected) + + +def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): + result = sanitize_array(values, dtype=dtype) + tm.assert_numpy_array_equal(result, expected) From 8838773801e898a02d0b359d84116c4c16d18128 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Mon, 11 Nov 2024 22:48:10 +0800 Subject: [PATCH 5/8] add test2 --- .../dtypes/cast/test_construct_ndarray.py | 45 ++++++++++--------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index d1140c73d09b4..e54d1c9e6c508 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -14,6 +14,31 @@ (["1", "2", None], None, np.array(["1", "2", None])), (["1", "2", None], np.dtype("str"), np.array(["1", "2", None])), ([1, 2, None], np.dtype("str"), np.array(["1", "2", None])), + ], +) +def test_construct_1d_ndarray_preserving_na( + values, dtype, expected, using_infer_string +): + result = sanitize_array(values, index=None, dtype=dtype) + if using_infer_string and expected.dtype == object and dtype is None: + tm.assert_extension_array_equal(result, pd.array(expected, dtype="str")) + else: + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) +def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): + arr = np.arange(5, dtype=np.int64).view(dtype) + expected = np.array(list(arr), dtype=object) + assert all(isinstance(x, type(arr[0])) for x in expected) + + result = sanitize_array(arr, index=None, dtype=np.dtype(object)) + tm.assert_numpy_array_equal(result, expected) + + +@pytest.mark.parametrize( + "values, dtype, expected", + [ ( np.ma.masked_array([1, 2, 3], mask=[False, True, False]), "int64", @@ -41,26 +66,6 @@ ), ], ) -def test_construct_1d_ndarray_preserving_na( - values, dtype, expected, using_infer_string -): - result = sanitize_array(values, index=None, dtype=dtype) - if using_infer_string and expected.dtype == object and dtype is None: - tm.assert_extension_array_equal(result, pd.array(expected, dtype="str")) - else: - tm.assert_numpy_array_equal(result, expected) - - -@pytest.mark.parametrize("dtype", ["m8[ns]", "M8[ns]"]) -def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): - arr = np.arange(5, dtype=np.int64).view(dtype) - expected = np.array(list(arr), dtype=object) - assert all(isinstance(x, type(arr[0])) for x in expected) - - result = sanitize_array(arr, index=None, dtype=np.dtype(object)) - tm.assert_numpy_array_equal(result, expected) - - def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): result = sanitize_array(values, dtype=dtype) tm.assert_numpy_array_equal(result, expected) From 1f72bc35ce269490bc94b2f1e8d7ce955f0d15ac Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Mon, 11 Nov 2024 23:20:26 +0800 Subject: [PATCH 6/8] add test3 --- pandas/tests/dtypes/cast/test_construct_ndarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index e54d1c9e6c508..d4bb4e135170b 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -67,5 +67,5 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): ], ) def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): - result = sanitize_array(values, dtype=dtype) + result = sanitize_array(values, index=None, type=dtype) tm.assert_numpy_array_equal(result, expected) From d78be3bac743f212369dfcc518aa920161281858 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Mon, 11 Nov 2024 23:36:29 +0800 Subject: [PATCH 7/8] add test4 --- pandas/tests/dtypes/cast/test_construct_ndarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index d4bb4e135170b..d021c11f5a339 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -67,5 +67,5 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): ], ) def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): - result = sanitize_array(values, index=None, type=dtype) + result = sanitize_array(values, index=None, dtype=dtype) tm.assert_numpy_array_equal(result, expected) From 7521f65dafdf7c365b0ab30e80e2be8cbc7c8ce8 Mon Sep 17 00:00:00 2001 From: lfffkh <1906921213@qq.com> Date: Mon, 11 Nov 2024 23:57:18 +0800 Subject: [PATCH 8/8] add test5 --- pandas/tests/dtypes/cast/test_construct_ndarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py index d021c11f5a339..609123f1d4642 100644 --- a/pandas/tests/dtypes/cast/test_construct_ndarray.py +++ b/pandas/tests/dtypes/cast/test_construct_ndarray.py @@ -68,4 +68,4 @@ def test_construct_1d_ndarray_preserving_na_datetimelike(dtype): ) def test_sanitize_masked_array_with_masked_ea(values, dtype, expected): result = sanitize_array(values, index=None, dtype=dtype) - tm.assert_numpy_array_equal(result, expected) + tm.assert_masked_array_equal(result, expected)