BUG: ArrayManager indexing mismatched behavior (#45639)

jbrockmendel · web-flow · commit 0f5d934be7d3 · 2022-01-27T21:04:36.000-05:00
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -20,6 +20,7 @@
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
+from pandas.core.dtypes.cast import can_hold_element
 from pandas.core.dtypes.common import (
     is_array_like,
     is_bool_dtype,
@@ -1584,15 +1585,13 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
 
         # if there is only one block/type, still have to take split path
         # unless the block is one-dimensional or it can hold the value
-        if (
-            not take_split_path
-            and getattr(self.obj._mgr, "blocks", False)
-            and self.ndim > 1
-        ):
+        if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
             # in case of dict, keys are indices
             val = list(value.values()) if isinstance(value, dict) else value
-            blk = self.obj._mgr.blocks[0]
-            take_split_path = not blk._can_hold_element(val)
+            arr = self.obj._mgr.arrays[0]
+            take_split_path = not can_hold_element(
+                arr, extract_array(val, extract_numpy=True)
+            )
 
         # if we have any multi-indexes that have non-trivial slices
         # (not null slices) then we must take the split path, xref
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -341,9 +341,8 @@ def where(self: T, other, cond, align: bool) -> T:
             cond=cond,
         )
 
-    # TODO what is this used for?
-    # def setitem(self, indexer, value) -> ArrayManager:
-    #     return self.apply_with_block("setitem", indexer=indexer, value=value)
+    def setitem(self: T, indexer, value) -> T:
+        return self.apply_with_block("setitem", indexer=indexer, value=value)
 
     def putmask(self, mask, new, align: bool = True):
         if align:
@@ -467,7 +466,7 @@ def is_view(self) -> bool:
 
     @property
     def is_single_block(self) -> bool:
-        return False
+        return len(self.arrays) == 1
 
     def _get_data_subset(self: T, predicate: Callable) -> T:
         indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py
@@ -1,13 +1,6 @@
 import numpy as np
 import pytest
 
-from pandas.core.dtypes.dtypes import (
-    DatetimeTZDtype,
-    IntervalDtype,
-    PandasDtype,
-    PeriodDtype,
-)
-
 import pandas as pd
 import pandas._testing as tm
 from pandas.tests.extension.base.base import BaseExtensionTests
@@ -367,19 +360,6 @@ def test_setitem_series(self, data, full_indexer):
     def test_setitem_frame_2d_values(self, data, request):
         # GH#44514
         df = pd.DataFrame({"A": data})
-
-        # Avoiding using_array_manager fixture
-        #  https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
-        using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
-        if using_array_manager:
-            if not isinstance(
-                data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
-            ):
-                # These dtypes have non-broken implementations of _can_hold_element
-                mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
-                request.node.add_marker(mark)
-
-        df = pd.DataFrame({"A": data})
         orig = df.copy()
 
         df.iloc[:] = df
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1212,8 +1212,6 @@ def test_setitem_array_as_cell_value(self):
         expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
         tm.assert_frame_equal(df, expected)
 
-    # with AM goes through split-path, loses dtype
-    @td.skip_array_manager_not_yet_implemented
     def test_iloc_setitem_nullable_2d_values(self):
         df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
         orig = df.copy()
diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -673,19 +673,7 @@ def test_quantile_ea_with_na(self, obj, index):
 
     # TODO(GH#39763): filtering can be removed after GH#39763 is fixed
     @pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
-    def test_quantile_ea_all_na(
-        self, obj, index, frame_or_series, using_array_manager, request
-    ):
-        if (
-            using_array_manager
-            and frame_or_series is DataFrame
-            and index.dtype == "m8[ns]"
-        ):
-            mark = pytest.mark.xfail(
-                reason="obj.astype fails bc obj is incorrectly dt64 at this point"
-            )
-            request.node.add_marker(mark)
-
+    def test_quantile_ea_all_na(self, obj, index, frame_or_series, request):
         obj.iloc[:] = index._na_value
 
         # TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -82,9 +82,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
 
         overwrite = isinstance(key, slice) and key == slice(None)
 
-        if overwrite or using_array_manager:
-            # TODO(ArrayManager) we always overwrite because ArrayManager takes
-            #  the "split" path, which still overwrites
+        if overwrite:
             # TODO: GH#39986 this probably shouldn't behave differently
             expected = DataFrame({0: cat})
             assert not np.shares_memory(df.values, orig_vals)
@@ -108,13 +106,13 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         tm.assert_frame_equal(df, expected)
 
     @pytest.mark.parametrize("box", [array, Series])
-    def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
+    def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
         # GH#38952 Case with not setting a full column
         #  IntegerArray without NAs
         arr = array([1, 2, 3, 4])
         obj = frame_or_series(arr.to_numpy("i8"))
 
-        if frame_or_series is Series or not using_array_manager:
+        if frame_or_series is Series:
             values = obj.values
         else:
             values = obj[0].values
@@ -131,10 +129,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager
         if frame_or_series is Series:
             assert obj.values is values
         else:
-            if using_array_manager:
-                assert obj[0].values is values
-            else:
-                assert obj.values.base is values.base and values.base is not None
+            assert np.shares_memory(obj[0].values, values)
 
     def test_is_scalar_access(self):
         # GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -999,9 +994,6 @@ def test_iloc_getitem_readonly_key(self):
         expected = df["data"].loc[[1, 3, 6]]
         tm.assert_series_equal(result, expected)
 
-    # TODO(ArrayManager) setting single item with an iterable doesn't work yet
-    # in the "split" path
-    @td.skip_array_manager_not_yet_implemented
     def test_iloc_assign_series_to_df_cell(self):
         # GH 37593
         df = DataFrame(columns=["a"], index=[0])
@@ -1224,8 +1216,6 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
             # GH#32257 we let numpy do validation, get their exception
             float_frame.iloc[:, :, :] = 1
 
-    # TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
-    @td.skip_array_manager_not_yet_implemented
     def test_iloc_frame_indexer(self):
         # GH#39004
         df = DataFrame({"a": [1, 2, 3]})
diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py
@@ -7,8 +7,6 @@
 import numpy as np
 import pytest
 
-import pandas.util._test_decorators as td
-
 from pandas.core.dtypes.common import (
     is_float_dtype,
     is_integer_dtype,
@@ -504,9 +502,6 @@ def test_multi_assign_broadcasting_rhs(self):
         df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
         tm.assert_frame_equal(df, expected)
 
-    # TODO(ArrayManager) setting single item with an iterable doesn't work yet
-    # in the "split" path
-    @td.skip_array_manager_not_yet_implemented
     def test_setitem_list(self):
 
         # GH 6043
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -676,18 +676,14 @@ def test_loc_modify_datetime(self):
 
         tm.assert_frame_equal(df, expected)
 
-    def test_loc_setitem_frame_with_reindex(self, using_array_manager):
+    def test_loc_setitem_frame_with_reindex(self):
         # GH#6254 setting issue
         df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
         df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
 
         # setting integer values into a float dataframe with loc is inplace,
         #  so we retain float dtype
         ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
-        if using_array_manager:
-            # TODO(ArrayManager) with "split" path, we still overwrite the column
-            # and therefore don't take the dtype of the underlying object into account
-            ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
         expected = DataFrame({"A": ser})
         tm.assert_frame_equal(df, expected)
 
@@ -709,9 +705,6 @@ def test_loc_setitem_frame_with_inverted_slice(self):
         expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3])
         tm.assert_frame_equal(df, expected)
 
-    # TODO(ArrayManager) "split" path overwrites column and therefore don't take
-    # the dtype of the underlying object into account
-    @td.skip_array_manager_not_yet_implemented
     def test_loc_setitem_empty_frame(self):
         # GH#6252 setting with an empty frame
         keys1 = ["@" + str(i) for i in range(5)]
@@ -1232,6 +1225,10 @@ def test_loc_getitem_time_object(self, frame_or_series):
     @pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
     @pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
     @td.skip_if_no_scipy
+    @pytest.mark.filterwarnings(
+        # TODO(2.0): remove filtering; note only needed for using_array_manager
+        "ignore:The behavior of .astype from SparseDtype.*FutureWarning"
+    )
     def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
         import scipy.sparse