From ac09146878898d083603915807ac5c29ce40e878 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 18 Nov 2021 08:42:58 -0800 Subject: [PATCH 01/10] BUG: IntegerArray/FloatingArray constructors mismatched NAs --- pandas/_libs/missing.pyx | 24 +++++++ pandas/core/arrays/floating.py | 15 +++-- pandas/core/arrays/integer.py | 11 ++- pandas/core/internals/blocks.py | 9 +++ .../arrays/floating/test_construction.py | 12 ++-- pandas/tests/frame/indexing/test_indexing.py | 67 +++++++++++++++++++ pandas/tests/series/methods/test_clip.py | 11 ++- pandas/tests/series/test_constructors.py | 18 +++++ 8 files changed, 149 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index b77db2aec4a08..7cd5f78a70914 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -368,6 +368,30 @@ cdef bint checknull_with_nat_and_na(object obj): return checknull_with_nat(obj) or obj is C_NA +@cython.wraparound(False) +@cython.boundscheck(False) +def is_numeric_na(ndarray values): + """ + Check for NA values consistent with IntegerArray/FloatingArray. + + Similar to a vectorized is_valid_na_for_dtype restricted to numeric dtypes. + """ + cdef: + ndarray[uint8_t] result + Py_ssize_t i, N + object val + + N = len(values) + result = np.zeros(N, dtype=np.uint8) + + for i in range(N): + val = values[i] + if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): + result[i] = True + + return result.view(bool) + + # ----------------------------------------------------------------------------- # Implementation of NA singleton diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 6d6cc03a1c83e..e08d7fb92e912 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -135,8 +135,7 @@ def coerce_to_array( if is_object_dtype(values): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": - values = np.empty(len(values)) - values.fill(np.nan) + pass elif inferred_type not in [ "floating", "integer", @@ -152,13 +151,19 @@ def coerce_to_array( elif not (is_integer_dtype(values) or is_float_dtype(values)): raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") + if values.ndim != 1: + raise TypeError("values must be a 1D list-like") + if mask is None: - mask = isna(values) + mask = libmissing.is_numeric_na(values) + mask2 = isna(values) + if not (mask == mask2).all(): + # e.g. if we have a timedelta64("NaT") + raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") + else: assert len(mask) == len(values) - if not values.ndim == 1: - raise TypeError("values must be a 1D list-like") if not mask.ndim == 1: raise TypeError("mask must be a 1D list-like") diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d8b7bf2b86d2c..9723b040b4e13 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -35,7 +35,6 @@ is_string_dtype, pandas_dtype, ) -from pandas.core.dtypes.missing import isna from pandas.core.arrays import ExtensionArray from pandas.core.arrays.masked import ( @@ -190,8 +189,7 @@ def coerce_to_array( if is_object_dtype(values) or is_string_dtype(values): inferred_type = lib.infer_dtype(values, skipna=True) if inferred_type == "empty": - values = np.empty(len(values)) - values.fill(np.nan) + pass elif inferred_type not in [ "floating", "integer", @@ -209,13 +207,14 @@ def coerce_to_array( elif not (is_integer_dtype(values) or is_float_dtype(values)): raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype") + if values.ndim != 1: + raise TypeError("values must be a 1D list-like") + if mask is None: - mask = isna(values) + mask = libmissing.is_numeric_na(values) else: assert len(mask) == len(values) - if not values.ndim == 1: - raise TypeError("values must be a 1D list-like") if not mask.ndim == 1: raise TypeError("mask must be a 1D list-like") diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 7b6a76f0a5d10..1af59bb61e0c9 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1480,6 +1480,15 @@ def setitem(self, indexer, value): # we are always 1-D indexer = indexer[0] + # TODO(EA2D): not needed with 2D EAS + if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2: + assert value.shape[1] == 1 + value = value[:, 0] + elif isinstance(value, ABCDataFrame): + # TODO: should we avoid getting here with DataFrame? + assert value.shape[1] == 1 + value = value._ixs(0, axis=1)._values + check_setitem_lengths(indexer, value, self.values) self.values[indexer] = value return self diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 4ce3dd35b538b..484c269d52333 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -97,14 +97,18 @@ def test_to_array_mixed_integer_float(): np.array(["foo"]), [[1, 2], [3, 4]], [np.nan, {"a": 1}], + # all-NA case used to get quietly swapped out before checking ndim + np.array([pd.NA] * 6, dtype=object).reshape(3, 2), ], ) def test_to_array_error(values): # error in converting existing arrays to FloatingArray - msg = ( - r"(:?.* cannot be converted to a FloatingDtype)" - r"|(:?values must be a 1D list-like)" - r"|(:?Cannot pass scalar)" + msg = "|".join( + [ + "cannot be converted to a FloatingDtype", + "values must be a 1D list-like", + "Cannot pass scalar", + ] ) with pytest.raises((TypeError, ValueError), match=msg): pd.array(values, dtype="Float64") diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 942da38dc5a26..9ee18dcd87a3e 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1217,6 +1217,73 @@ def test_setitem_array_as_cell_value(self): expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]}) tm.assert_frame_equal(df, expected) + def test_iloc_setitem_nullable_2d_values(self): + + df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") + orig = df.copy() + + df.loc[:] = df.values[:, ::-1] + tm.assert_frame_equal(df, orig) + + df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1]) + tm.assert_frame_equal(df, orig) + + df.iloc[:] = df.iloc[:, :] + tm.assert_frame_equal(df, orig) + + @pytest.mark.parametrize( + "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")] + ) + def test_setting_mismatched_na_into_nullable_fails( + self, null, any_numeric_ea_dtype + ): + # don't cast mismatched nulls to pd.NA + df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype) + ser = df["A"] + arr = ser._values + + msg = "|".join( + [ + r"int\(\) argument must be a string, a bytes-like object or a " + "number, not 'NaTType'", + r"timedelta64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", + r"datetime64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", + "object cannot be converted to a FloatingDtype", + ] + ) + with pytest.raises(TypeError, match=msg): + arr[0] = null + + with pytest.raises(TypeError, match=msg): + arr[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser[0] = null + + with pytest.raises(TypeError, match=msg): + ser[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + ser.iloc[0] = null + + with pytest.raises(TypeError, match=msg): + ser.iloc[:2] = [null, null] + + with pytest.raises(TypeError, match=msg): + df.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df.iloc[:2, 0] = [null, null] + + # Multi-Block + df2 = df.copy() + df2["B"] = ser.copy() + with pytest.raises(TypeError, match=msg): + df2.iloc[0, 0] = null + + with pytest.raises(TypeError, match=msg): + df2.iloc[:2, 0] = [null, null] + class TestDataFrameIndexingUInt64: def test_setitem(self, uint64_frame): diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index 247f0d50772ce..bc6d5aeb0a581 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -46,9 +46,14 @@ def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixtur # Ensure that clipping method can handle NA values with out failing # GH#40581 - s = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype) - s_clipped_upper = s.clip(upper=2.0) - s_clipped_lower = s.clip(lower=2.0) + if nulls_fixture is pd.NaT: + # constructor will raise, see + # test_constructor_mismatched_null_nullable_dtype + return + + ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype) + s_clipped_upper = ser.clip(upper=2.0) + s_clipped_lower = ser.clip(lower=2.0) expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype) expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 692c040a33ff8..c83f57981e11d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1817,6 +1817,24 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array]) + def test_constructor_mismatched_null_nullable_dtype( + self, func, any_numeric_ea_dtype + ): + msg = "|".join( + [ + "cannot safely cast non-equivalent object", + r"int\(\) argument must be a string, a bytes-like object or a number", + r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) " + "according to the rule 'safe'", + "object cannot be converted to a FloatingDtype", + ] + ) + + for null in tm.NP_NAT_OBJECTS + [NaT]: + with pytest.raises(TypeError, match=msg): + func([null, 1.0, 3.0], dtype=any_numeric_ea_dtype) + class TestSeriesConstructorIndexCoercion: def test_series_constructor_datetimelike_index_coercion(self): From 1166725f07644fefb2f94dddf4422beb92ecefb7 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 18 Nov 2021 08:52:27 -0800 Subject: [PATCH 02/10] Whatsnew, GH ref --- doc/source/whatsnew/v1.4.0.rst | 3 +++ .../tests/arrays/floating/test_construction.py | 2 +- pandas/tests/extension/base/setitem.py | 17 +++++++++++++++++ pandas/tests/frame/indexing/test_indexing.py | 2 +- pandas/tests/series/test_constructors.py | 1 + 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 2456406f0eca3..5db79665354f6 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -610,6 +610,8 @@ Indexing - Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`) - Bug in :meth:`DataFrame.loc.__setitem__` and :meth:`DataFrame.iloc.__setitem__` with mixed dtypes sometimes failing to operate in-place (:issue:`44345`) - Bug in :meth:`DataFrame.loc.__getitem__` incorrectly raising ``KeyError`` when selecting a single column with a boolean key (:issue:`44322`). +- Bug in setting :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and setting 2D values e.g. ``df.iloc[:] = df.values`` incorrectly raising (:issue:`44514`) +- Missing ^^^^^^^ @@ -709,6 +711,7 @@ ExtensionArray - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`) - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`) - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`) +- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`) - Styler diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py index 484c269d52333..4b7b237d2eb7c 100644 --- a/pandas/tests/arrays/floating/test_construction.py +++ b/pandas/tests/arrays/floating/test_construction.py @@ -97,7 +97,7 @@ def test_to_array_mixed_integer_float(): np.array(["foo"]), [[1, 2], [3, 4]], [np.nan, {"a": 1}], - # all-NA case used to get quietly swapped out before checking ndim + # GH#44514 all-NA case used to get quietly swapped out before checking ndim np.array([pd.NA] * 6, dtype=object).reshape(3, 2), ], ) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index a2d100db81a2c..221710fbffca1 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -357,6 +357,23 @@ def test_setitem_series(self, data, full_indexer): ) self.assert_series_equal(result, expected) + def test_setitem_frame_2d_values(self, data): + # GH#44514 + df = pd.DataFrame({"A": data}) + orig = df.copy() + + df.iloc[:] = df + self.assert_frame_equal(df, orig) + + df.iloc[:-1] = df.iloc[:-1] + self.assert_frame_equal(df, orig) + + df.iloc[:] = df.values + self.assert_frame_equal(df, orig) + + df.iloc[:-1] = df.values[:-1] + self.assert_frame_equal(df, orig) + def test_delitem_series(self, data): # GH#40763 ser = pd.Series(data, name="data") diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 9ee18dcd87a3e..1ab9e15ea889b 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1237,7 +1237,7 @@ def test_iloc_setitem_nullable_2d_values(self): def test_setting_mismatched_na_into_nullable_fails( self, null, any_numeric_ea_dtype ): - # don't cast mismatched nulls to pd.NA + # GH#44514 don't cast mismatched nulls to pd.NA df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype) ser = df["A"] arr = ser._values diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index c83f57981e11d..defa1af21a60d 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1821,6 +1821,7 @@ def test_constructor_bool_dtype_missing_values(self): def test_constructor_mismatched_null_nullable_dtype( self, func, any_numeric_ea_dtype ): + # GH#44514 msg = "|".join( [ "cannot safely cast non-equivalent object", From 21b6977f8445edd300cad1491d78f811201bd566 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 18 Nov 2021 13:26:10 -0800 Subject: [PATCH 03/10] mypy fixup --- pandas/core/internals/blocks.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1af59bb61e0c9..05a3a30acd21a 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1483,7 +1483,9 @@ def setitem(self, indexer, value): # TODO(EA2D): not needed with 2D EAS if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2: assert value.shape[1] == 1 - value = value[:, 0] + # error: No overload variant of "__getitem__" of "ExtensionArray" + # matches argument type "Tuple[slice, int]" + value = value[:, 0] # type: ignore[call-overload] elif isinstance(value, ABCDataFrame): # TODO: should we avoid getting here with DataFrame? assert value.shape[1] == 1 From a4d89ce4e92bf825e789db491d188b21baf5f458 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Nov 2021 13:40:07 -0800 Subject: [PATCH 04/10] xfail on old numpy --- pandas/tests/series/test_constructors.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index defa1af21a60d..3bde2e2a7cb01 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1817,6 +1817,9 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) + @pytest.mark.xfail( + np_version_under1p19, reason="np.array([td64nat, float, float]) raises" + ) @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array]) def test_constructor_mismatched_null_nullable_dtype( self, func, any_numeric_ea_dtype From d322af3ee10be6cd8e06cd87a16bb30633e06b35 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Nov 2021 13:55:59 -0800 Subject: [PATCH 05/10] xfail ArrayManager --- pandas/tests/extension/base/setitem.py | 17 ++++++++++++++++- pandas/tests/frame/indexing/test_indexing.py | 5 ++++- pandas/tests/series/test_constructors.py | 6 +++++- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 221710fbffca1..68b356ec32c25 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -1,6 +1,13 @@ import numpy as np import pytest +from pandas.core.dtypes.dtypes import ( + DatetimeTZDtype, + IntervalDtype, + PandasDtype, + PeriodDtype, +) + import pandas as pd import pandas._testing as tm from pandas.tests.extension.base.base import BaseExtensionTests @@ -357,8 +364,16 @@ def test_setitem_series(self, data, full_indexer): ) self.assert_series_equal(result, expected) - def test_setitem_frame_2d_values(self, data): + def test_setitem_frame_2d_values(self, data, using_array_manager, request): # GH#44514 + if using_array_manager: + if not isinstance( + data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) + ): + # These dtypes have non-broken implementations of _can_hold_element + mark = pytest.mark.xfail(reason="Goes through split path, loses dtype") + request.node.add_marker(mark) + df = pd.DataFrame({"A": data}) orig = df.copy() diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1ab9e15ea889b..8a1b17e4f9432 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1217,7 +1217,10 @@ def test_setitem_array_as_cell_value(self): expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]}) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_nullable_2d_values(self): + def test_iloc_setitem_nullable_2d_values(self, using_array_manager, request): + if using_array_manager: + mark = pytest.mark.xfail(reason="Goes through split path, loses dtype") + request.node.add_marker(mark) df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") orig = df.copy() diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3bde2e2a7cb01..0322eb1973142 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1817,6 +1817,9 @@ def test_constructor_bool_dtype_missing_values(self): expected = Series(True, index=[0], dtype="bool") tm.assert_series_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:elementwise comparison failed:DeprecationWarning" + ) @pytest.mark.xfail( np_version_under1p19, reason="np.array([td64nat, float, float]) raises" ) @@ -1828,7 +1831,8 @@ def test_constructor_mismatched_null_nullable_dtype( msg = "|".join( [ "cannot safely cast non-equivalent object", - r"int\(\) argument must be a string, a bytes-like object or a number", + r"int\(\) argument must be a string, a bytes-like object " + "or a (real )?number", r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) " "according to the rule 'safe'", "object cannot be converted to a FloatingDtype", From 67d615da901226a9c9e2a84bd90a71ee5f319255 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Nov 2021 14:47:12 -0800 Subject: [PATCH 06/10] update tested expception message for py310 --- pandas/tests/frame/indexing/test_indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 8a1b17e4f9432..1250d5c80e017 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1248,7 +1248,7 @@ def test_setting_mismatched_na_into_nullable_fails( msg = "|".join( [ r"int\(\) argument must be a string, a bytes-like object or a " - "number, not 'NaTType'", + "(real )?number, not 'NaTType'", r"timedelta64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", r"datetime64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", "object cannot be converted to a FloatingDtype", From 117aef7b7d6ffcf2a506640dae875e28d4fddcd9 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 20 Nov 2021 16:20:46 -0800 Subject: [PATCH 07/10] xfail on later numpy --- pandas/tests/series/test_constructors.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 0322eb1973142..ebe47b88a24d4 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -13,7 +13,10 @@ iNaT, lib, ) -from pandas.compat.numpy import np_version_under1p19 +from pandas.compat.numpy import ( + np_version_under1p19, + np_version_under1p20, +) import pandas.util._test_decorators as td from pandas.core.dtypes.common import ( @@ -1821,7 +1824,7 @@ def test_constructor_bool_dtype_missing_values(self): "ignore:elementwise comparison failed:DeprecationWarning" ) @pytest.mark.xfail( - np_version_under1p19, reason="np.array([td64nat, float, float]) raises" + np_version_under1p20, reason="np.array([td64nat, float, float]) raises" ) @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array]) def test_constructor_mismatched_null_nullable_dtype( From 2a2f8d2afbe28c71306f8efb13fc7db85caf45e3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 22 Nov 2021 12:39:35 -0800 Subject: [PATCH 08/10] use decorator --- pandas/core/arrays/floating.py | 5 ++++- pandas/core/arrays/integer.py | 1 + pandas/tests/extension/base/setitem.py | 7 ++++++- pandas/tests/frame/indexing/test_indexing.py | 7 ++----- pandas/util/_test_decorators.py | 5 +++++ 5 files changed, 18 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 8fe8ab28040b8..0561c0bfa1a07 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -4,7 +4,10 @@ import numpy as np -from pandas._libs import lib +from pandas._libs import ( + lib, + missing as libmissing, +) from pandas._typing import ( ArrayLike, AstypeArg, diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index d60c3de119466..0e82ef731bb63 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -7,6 +7,7 @@ from pandas._libs import ( iNaT, lib, + missing as libmissing, ) from pandas._typing import ( ArrayLike, diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 68b356ec32c25..208a1a1757be2 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -364,8 +364,13 @@ def test_setitem_series(self, data, full_indexer): ) self.assert_series_equal(result, expected) - def test_setitem_frame_2d_values(self, data, using_array_manager, request): + def test_setitem_frame_2d_values(self, data, request): # GH#44514 + df = pd.DataFrame({"A": data}) + + # Avoiding using_array_manager fixture + # https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410 + using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager) if using_array_manager: if not isinstance( data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1250d5c80e017..418408324d6f2 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1217,11 +1217,8 @@ def test_setitem_array_as_cell_value(self): expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]}) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_nullable_2d_values(self, using_array_manager, request): - if using_array_manager: - mark = pytest.mark.xfail(reason="Goes through split path, loses dtype") - request.node.add_marker(mark) - + @td.xfail_array_manager # with AM goes through split-path, loses dtype + def test_iloc_setitem_nullable_2d_values(self): df = DataFrame({"A": [1, 2, 3]}, dtype="Int64") orig = df.copy() diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index d5ffca36d325f..4aee5b0fb18ef 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -285,6 +285,11 @@ def async_mark(): return async_mark +xfail_array_manager = pytest.mark.skipif( + get_option("mode.data_manager") == "array", + reason="Fails with ArrayManager", +) + skip_array_manager_not_yet_implemented = pytest.mark.skipif( get_option("mode.data_manager") == "array", reason="Not yet implemented for ArrayManager", From 48a453107cf1da170b6bfa5b279052a2cc13dd39 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 26 Nov 2021 19:06:12 -0800 Subject: [PATCH 09/10] raise in is_numeric_na --- pandas/_libs/missing.pyx | 8 +++++--- pandas/core/arrays/floating.py | 4 ---- pandas/tests/frame/indexing/test_indexing.py | 1 + pandas/tests/series/test_constructors.py | 1 + 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index c3a4a3bed739b..585b535775397 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -270,9 +270,11 @@ def is_numeric_na(values: ndarray) -> ndarray: for i in range(N): val = values[i] - if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): - result[i] = True - + if checknull(val): + if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val): + result[i] = True + else: + raise TypeError(f"'values' contains non-numeric NA {val}") return result.view(bool) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 0561c0bfa1a07..1144e8907a8b1 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -153,10 +153,6 @@ def coerce_to_array( if mask is None: mask = libmissing.is_numeric_na(values) - mask2 = isna(values) - if not (mask == mask2).all(): - # e.g. if we have a timedelta64("NaT") - raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype") else: assert len(mask) == len(values) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 0a77862be1f73..b102bcdae57d9 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1250,6 +1250,7 @@ def test_setting_mismatched_na_into_nullable_fails( r"timedelta64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", r"datetime64\[ns\] cannot be converted to an? (Floating|Integer)Dtype", "object cannot be converted to a FloatingDtype", + "'values' contains non-numeric NA", ] ) with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ebe47b88a24d4..43e4c8364c06c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1839,6 +1839,7 @@ def test_constructor_mismatched_null_nullable_dtype( r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) " "according to the rule 'safe'", "object cannot be converted to a FloatingDtype", + "'values' contains non-numeric NA", ] ) From 745d24f6e2cf076c63320e3395e2040e2d3bc9aa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 26 Nov 2021 19:14:13 -0800 Subject: [PATCH 10/10] fixup unused import --- pandas/core/arrays/floating.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py index 1144e8907a8b1..5e55715ee0e97 100644 --- a/pandas/core/arrays/floating.py +++ b/pandas/core/arrays/floating.py @@ -30,7 +30,6 @@ ExtensionDtype, register_extension_dtype, ) -from pandas.core.dtypes.missing import isna from pandas.core.arrays import ExtensionArray from pandas.core.arrays.numeric import (