pandas-dev · jreback · Dec 1, 2021 · Nov 18, 2021 · Nov 18, 2021 · Nov 18, 2021
diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
@@ -616,6 +616,8 @@ Indexing
 - Bug in :meth:`Series.__setitem__` with a boolean mask indexer setting a listlike value of length 1 incorrectly broadcasting that value (:issue:`44265`)
 - Bug in :meth:`DataFrame.loc.__setitem__` and :meth:`DataFrame.iloc.__setitem__` with mixed dtypes sometimes failing to operate in-place (:issue:`44345`)
 - Bug in :meth:`DataFrame.loc.__getitem__` incorrectly raising ``KeyError`` when selecting a single column with a boolean key (:issue:`44322`).
+- Bug in setting :meth:`DataFrame.iloc` with a single ``ExtensionDtype`` column and setting 2D values e.g. ``df.iloc[:] = df.values`` incorrectly raising (:issue:`44514`)
+-
 
 Missing
 ^^^^^^^
@@ -717,6 +719,7 @@ ExtensionArray
 - Bug in :func:`array` failing to preserve :class:`PandasArray` (:issue:`43887`)
 - NumPy ufuncs ``np.abs``, ``np.positive``, ``np.negative`` now correctly preserve dtype when called on ExtensionArrays that implement ``__abs__, __pos__, __neg__``, respectively. In particular this is fixed for :class:`TimedeltaArray` (:issue:`43899`)
 - Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
+- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
 - Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
 -
 

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -264,6 +264,34 @@ cdef bint checknull_with_nat_and_na(object obj):
     return checknull_with_nat(obj) or obj is C_NA
 
 
+@cython.wraparound(False)
+@cython.boundscheck(False)
+def is_numeric_na(values: ndarray) -> ndarray:
+    """
+    Check for NA values consistent with IntegerArray/FloatingArray.
+
+    Similar to a vectorized is_valid_na_for_dtype restricted to numeric dtypes.
+
+    Returns
+    -------
+    ndarray[bool]
+    """
+    cdef:
+        ndarray[uint8_t] result
+        Py_ssize_t i, N
+        object val
+
+    N = len(values)
+    result = np.zeros(N, dtype=np.uint8)
+
+    for i in range(N):
+        val = values[i]
+        if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val):
+            result[i] = True
+
+    return result.view(bool)
+
+
 # -----------------------------------------------------------------------------
 # Implementation of NA singleton
 

diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -135,8 +135,7 @@ def coerce_to_array(
     if is_object_dtype(values):
         inferred_type = lib.infer_dtype(values, skipna=True)
         if inferred_type == "empty":
-            values = np.empty(len(values))
-            values.fill(np.nan)
+            pass
         elif inferred_type not in [
             "floating",
             "integer",
@@ -152,13 +151,19 @@ def coerce_to_array(
     elif not (is_integer_dtype(values) or is_float_dtype(values)):
         raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
 
+    if values.ndim != 1:
+        raise TypeError("values must be a 1D list-like")
+
     if mask is None:
-        mask = isna(values)
+        mask = libmissing.is_numeric_na(values)
+        mask2 = isna(values)
+        if not (mask == mask2).all():
+            # e.g. if we have a timedelta64("NaT")
+            raise TypeError(f"{values.dtype} cannot be converted to a FloatingDtype")
+
     else:
         assert len(mask) == len(values)
 
-    if not values.ndim == 1:
-        raise TypeError("values must be a 1D list-like")
     if not mask.ndim == 1:
         raise TypeError("mask must be a 1D list-like")
 

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -35,7 +35,6 @@
     is_string_dtype,
     pandas_dtype,
 )
-from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays import ExtensionArray
 from pandas.core.arrays.masked import (
@@ -190,8 +189,7 @@ def coerce_to_array(
     if is_object_dtype(values) or is_string_dtype(values):
         inferred_type = lib.infer_dtype(values, skipna=True)
         if inferred_type == "empty":
-            values = np.empty(len(values))
-            values.fill(np.nan)
+            pass
         elif inferred_type not in [
             "floating",
             "integer",
@@ -209,13 +207,14 @@ def coerce_to_array(
     elif not (is_integer_dtype(values) or is_float_dtype(values)):
         raise TypeError(f"{values.dtype} cannot be converted to an IntegerDtype")
 
+    if values.ndim != 1:
+        raise TypeError("values must be a 1D list-like")
+
     if mask is None:
-        mask = isna(values)
+        mask = libmissing.is_numeric_na(values)
     else:
         assert len(mask) == len(values)
 
-    if values.ndim != 1:
-        raise TypeError("values must be a 1D list-like")
     if mask.ndim != 1:
         raise TypeError("mask must be a 1D list-like")
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1508,6 +1508,17 @@ def setitem(self, indexer, value):
             # we are always 1-D
             indexer = indexer[0]
 
+        # TODO(EA2D): not needed with 2D EAS
+        if isinstance(value, (np.ndarray, ExtensionArray)) and value.ndim == 2:
+            assert value.shape[1] == 1
+            # error: No overload variant of "__getitem__" of "ExtensionArray"
+            # matches argument type "Tuple[slice, int]"
+            value = value[:, 0]  # type: ignore[call-overload]
+        elif isinstance(value, ABCDataFrame):
+            # TODO: should we avoid getting here with DataFrame?
+            assert value.shape[1] == 1
+            value = value._ixs(0, axis=1)._values
+
         check_setitem_lengths(indexer, value, self.values)
         self.values[indexer] = value
         return self

diff --git a/pandas/tests/arrays/floating/test_construction.py b/pandas/tests/arrays/floating/test_construction.py
@@ -97,14 +97,18 @@ def test_to_array_mixed_integer_float():
         np.array(["foo"]),
         [[1, 2], [3, 4]],
         [np.nan, {"a": 1}],
+        # GH#44514 all-NA case used to get quietly swapped out before checking ndim
+        np.array([pd.NA] * 6, dtype=object).reshape(3, 2),
     ],
 )
 def test_to_array_error(values):
     # error in converting existing arrays to FloatingArray
-    msg = (
-        r"(:?.* cannot be converted to a FloatingDtype)"
-        r"|(:?values must be a 1D list-like)"
-        r"|(:?Cannot pass scalar)"
+    msg = "|".join(
+        [
+            "cannot be converted to a FloatingDtype",
+            "values must be a 1D list-like",
+            "Cannot pass scalar",
+        ]
     )
     with pytest.raises((TypeError, ValueError), match=msg):
         pd.array(values, dtype="Float64")

diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -1,6 +1,13 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.dtypes import (
+    DatetimeTZDtype,
+    IntervalDtype,
+    PandasDtype,
+    PeriodDtype,
+)
+
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -357,6 +364,31 @@ def test_setitem_series(self, data, full_indexer):
         )
         self.assert_series_equal(result, expected)
 
+    def test_setitem_frame_2d_values(self, data, using_array_manager, request):
+        # GH#44514
+        if using_array_manager:
+            if not isinstance(
+                data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
+            ):
+                # These dtypes have non-broken implementations of _can_hold_element
+                mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
+                request.node.add_marker(mark)
+
+        df = pd.DataFrame({"A": data})
+        orig = df.copy()
+
+        df.iloc[:] = df
+        self.assert_frame_equal(df, orig)
+
+        df.iloc[:-1] = df.iloc[:-1]
+        self.assert_frame_equal(df, orig)
+
+        df.iloc[:] = df.values
+        self.assert_frame_equal(df, orig)
+
+        df.iloc[:-1] = df.values[:-1]
+        self.assert_frame_equal(df, orig)
+
     def test_delitem_series(self, data):
         # GH#40763
         ser = pd.Series(data, name="data")

diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1217,6 +1217,76 @@ def test_setitem_array_as_cell_value(self):
         expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
         tm.assert_frame_equal(df, expected)
 
+    def test_iloc_setitem_nullable_2d_values(self, using_array_manager, request):
+        if using_array_manager:
+            mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
+            request.node.add_marker(mark)
+
+        df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
+        orig = df.copy()
+
+        df.loc[:] = df.values[:, ::-1]
+        tm.assert_frame_equal(df, orig)
+
+        df.loc[:] = pd.core.arrays.PandasArray(df.values[:, ::-1])
+        tm.assert_frame_equal(df, orig)
+
+        df.iloc[:] = df.iloc[:, :]
+        tm.assert_frame_equal(df, orig)
+
+    @pytest.mark.parametrize(
+        "null", [pd.NaT, pd.NaT.to_numpy("M8[ns]"), pd.NaT.to_numpy("m8[ns]")]
+    )
+    def test_setting_mismatched_na_into_nullable_fails(
+        self, null, any_numeric_ea_dtype
+    ):
+        # GH#44514 don't cast mismatched nulls to pd.NA
+        df = DataFrame({"A": [1, 2, 3]}, dtype=any_numeric_ea_dtype)
+        ser = df["A"]
+        arr = ser._values
+
+        msg = "|".join(
+            [
+                r"int\(\) argument must be a string, a bytes-like object or a "
+                "(real )?number, not 'NaTType'",
+                r"timedelta64\[ns\] cannot be converted to an? (Floating|Integer)Dtype",
+                r"datetime64\[ns\] cannot be converted to an? (Floating|Integer)Dtype",
+                "object cannot be converted to a FloatingDtype",
+            ]
+        )
+        with pytest.raises(TypeError, match=msg):
+            arr[0] = null
+
+        with pytest.raises(TypeError, match=msg):
+            arr[:2] = [null, null]
+
+        with pytest.raises(TypeError, match=msg):
+            ser[0] = null
+
+        with pytest.raises(TypeError, match=msg):
+            ser[:2] = [null, null]
+
+        with pytest.raises(TypeError, match=msg):
+            ser.iloc[0] = null
+
+        with pytest.raises(TypeError, match=msg):
+            ser.iloc[:2] = [null, null]
+
+        with pytest.raises(TypeError, match=msg):
+            df.iloc[0, 0] = null
+
+        with pytest.raises(TypeError, match=msg):
+            df.iloc[:2, 0] = [null, null]
+
+        # Multi-Block
+        df2 = df.copy()
+        df2["B"] = ser.copy()
+        with pytest.raises(TypeError, match=msg):
+            df2.iloc[0, 0] = null
+
+        with pytest.raises(TypeError, match=msg):
+            df2.iloc[:2, 0] = [null, null]
+
 
 class TestDataFrameIndexingUInt64:
     def test_setitem(self, uint64_frame):

diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py
@@ -46,9 +46,14 @@ def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixtur
         # Ensure that clipping method can handle NA values with out failing
         # GH#40581
 
-        s = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
-        s_clipped_upper = s.clip(upper=2.0)
-        s_clipped_lower = s.clip(lower=2.0)
+        if nulls_fixture is pd.NaT:
+            # constructor will raise, see
+            #  test_constructor_mismatched_null_nullable_dtype
+            return
+
+        ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
+        s_clipped_upper = ser.clip(upper=2.0)
+        s_clipped_lower = ser.clip(lower=2.0)
 
         expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
         expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -13,7 +13,10 @@
     iNaT,
     lib,
 )
-from pandas.compat.numpy import np_version_under1p19
+from pandas.compat.numpy import (
+    np_version_under1p19,
+    np_version_under1p20,
+)
 import pandas.util._test_decorators as td
 
 from pandas.core.dtypes.common import (
@@ -1817,6 +1820,32 @@ def test_constructor_bool_dtype_missing_values(self):
         expected = Series(True, index=[0], dtype="bool")
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:elementwise comparison failed:DeprecationWarning"
+    )
+    @pytest.mark.xfail(
+        np_version_under1p20, reason="np.array([td64nat, float, float]) raises"
+    )
+    @pytest.mark.parametrize("func", [Series, DataFrame, Index, pd.array])
+    def test_constructor_mismatched_null_nullable_dtype(
+        self, func, any_numeric_ea_dtype
+    ):
+        # GH#44514
+        msg = "|".join(
+            [
+                "cannot safely cast non-equivalent object",
+                r"int\(\) argument must be a string, a bytes-like object "
+                "or a (real )?number",
+                r"Cannot cast array data from dtype\('O'\) to dtype\('float64'\) "
+                "according to the rule 'safe'",
+                "object cannot be converted to a FloatingDtype",
+            ]
+        )
+
+        for null in tm.NP_NAT_OBJECTS + [NaT]:
+            with pytest.raises(TypeError, match=msg):
+                func([null, 1.0, 3.0], dtype=any_numeric_ea_dtype)
+
 
 class TestSeriesConstructorIndexCoercion:
     def test_series_constructor_datetimelike_index_coercion(self):