Skip to content

Commit 0f5d934

Browse files
authored
BUG: ArrayManager indexing mismatched behavior (#45639)
1 parent 49bddad commit 0f5d934

File tree

8 files changed

+19
-73
lines changed

8 files changed

+19
-73
lines changed

pandas/core/indexing.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from pandas.util._decorators import doc
2121
from pandas.util._exceptions import find_stack_level
2222

23+
from pandas.core.dtypes.cast import can_hold_element
2324
from pandas.core.dtypes.common import (
2425
is_array_like,
2526
is_bool_dtype,
@@ -1584,15 +1585,13 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):
15841585

15851586
# if there is only one block/type, still have to take split path
15861587
# unless the block is one-dimensional or it can hold the value
1587-
if (
1588-
not take_split_path
1589-
and getattr(self.obj._mgr, "blocks", False)
1590-
and self.ndim > 1
1591-
):
1588+
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
15921589
# in case of dict, keys are indices
15931590
val = list(value.values()) if isinstance(value, dict) else value
1594-
blk = self.obj._mgr.blocks[0]
1595-
take_split_path = not blk._can_hold_element(val)
1591+
arr = self.obj._mgr.arrays[0]
1592+
take_split_path = not can_hold_element(
1593+
arr, extract_array(val, extract_numpy=True)
1594+
)
15961595

15971596
# if we have any multi-indexes that have non-trivial slices
15981597
# (not null slices) then we must take the split path, xref

pandas/core/internals/array_manager.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -341,9 +341,8 @@ def where(self: T, other, cond, align: bool) -> T:
341341
cond=cond,
342342
)
343343

344-
# TODO what is this used for?
345-
# def setitem(self, indexer, value) -> ArrayManager:
346-
# return self.apply_with_block("setitem", indexer=indexer, value=value)
344+
def setitem(self: T, indexer, value) -> T:
345+
return self.apply_with_block("setitem", indexer=indexer, value=value)
347346

348347
def putmask(self, mask, new, align: bool = True):
349348
if align:
@@ -467,7 +466,7 @@ def is_view(self) -> bool:
467466

468467
@property
469468
def is_single_block(self) -> bool:
470-
return False
469+
return len(self.arrays) == 1
471470

472471
def _get_data_subset(self: T, predicate: Callable) -> T:
473472
indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]

pandas/tests/extension/base/setitem.py

-20
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.core.dtypes.dtypes import (
5-
DatetimeTZDtype,
6-
IntervalDtype,
7-
PandasDtype,
8-
PeriodDtype,
9-
)
10-
114
import pandas as pd
125
import pandas._testing as tm
136
from pandas.tests.extension.base.base import BaseExtensionTests
@@ -367,19 +360,6 @@ def test_setitem_series(self, data, full_indexer):
367360
def test_setitem_frame_2d_values(self, data, request):
368361
# GH#44514
369362
df = pd.DataFrame({"A": data})
370-
371-
# Avoiding using_array_manager fixture
372-
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
373-
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
374-
if using_array_manager:
375-
if not isinstance(
376-
data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
377-
):
378-
# These dtypes have non-broken implementations of _can_hold_element
379-
mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
380-
request.node.add_marker(mark)
381-
382-
df = pd.DataFrame({"A": data})
383363
orig = df.copy()
384364

385365
df.iloc[:] = df

pandas/tests/frame/indexing/test_indexing.py

-2
Original file line numberDiff line numberDiff line change
@@ -1212,8 +1212,6 @@ def test_setitem_array_as_cell_value(self):
12121212
expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
12131213
tm.assert_frame_equal(df, expected)
12141214

1215-
# with AM goes through split-path, loses dtype
1216-
@td.skip_array_manager_not_yet_implemented
12171215
def test_iloc_setitem_nullable_2d_values(self):
12181216
df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
12191217
orig = df.copy()

pandas/tests/frame/methods/test_quantile.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -673,19 +673,7 @@ def test_quantile_ea_with_na(self, obj, index):
673673

674674
# TODO(GH#39763): filtering can be removed after GH#39763 is fixed
675675
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
676-
def test_quantile_ea_all_na(
677-
self, obj, index, frame_or_series, using_array_manager, request
678-
):
679-
if (
680-
using_array_manager
681-
and frame_or_series is DataFrame
682-
and index.dtype == "m8[ns]"
683-
):
684-
mark = pytest.mark.xfail(
685-
reason="obj.astype fails bc obj is incorrectly dt64 at this point"
686-
)
687-
request.node.add_marker(mark)
688-
676+
def test_quantile_ea_all_na(self, obj, index, frame_or_series, request):
689677
obj.iloc[:] = index._na_value
690678

691679
# TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed

pandas/tests/indexing/test_iloc.py

+4-14
Original file line numberDiff line numberDiff line change
@@ -82,9 +82,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
8282

8383
overwrite = isinstance(key, slice) and key == slice(None)
8484

85-
if overwrite or using_array_manager:
86-
# TODO(ArrayManager) we always overwrite because ArrayManager takes
87-
# the "split" path, which still overwrites
85+
if overwrite:
8886
# TODO: GH#39986 this probably shouldn't behave differently
8987
expected = DataFrame({0: cat})
9088
assert not np.shares_memory(df.values, orig_vals)
@@ -108,13 +106,13 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
108106
tm.assert_frame_equal(df, expected)
109107

110108
@pytest.mark.parametrize("box", [array, Series])
111-
def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
109+
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
112110
# GH#38952 Case with not setting a full column
113111
# IntegerArray without NAs
114112
arr = array([1, 2, 3, 4])
115113
obj = frame_or_series(arr.to_numpy("i8"))
116114

117-
if frame_or_series is Series or not using_array_manager:
115+
if frame_or_series is Series:
118116
values = obj.values
119117
else:
120118
values = obj[0].values
@@ -131,10 +129,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager
131129
if frame_or_series is Series:
132130
assert obj.values is values
133131
else:
134-
if using_array_manager:
135-
assert obj[0].values is values
136-
else:
137-
assert obj.values.base is values.base and values.base is not None
132+
assert np.shares_memory(obj[0].values, values)
138133

139134
def test_is_scalar_access(self):
140135
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -999,9 +994,6 @@ def test_iloc_getitem_readonly_key(self):
999994
expected = df["data"].loc[[1, 3, 6]]
1000995
tm.assert_series_equal(result, expected)
1001996

1002-
# TODO(ArrayManager) setting single item with an iterable doesn't work yet
1003-
# in the "split" path
1004-
@td.skip_array_manager_not_yet_implemented
1005997
def test_iloc_assign_series_to_df_cell(self):
1006998
# GH 37593
1007999
df = DataFrame(columns=["a"], index=[0])
@@ -1224,8 +1216,6 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
12241216
# GH#32257 we let numpy do validation, get their exception
12251217
float_frame.iloc[:, :, :] = 1
12261218

1227-
# TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
1228-
@td.skip_array_manager_not_yet_implemented
12291219
def test_iloc_frame_indexer(self):
12301220
# GH#39004
12311221
df = DataFrame({"a": [1, 2, 3]})

pandas/tests/indexing/test_indexing.py

-5
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
import numpy as np
88
import pytest
99

10-
import pandas.util._test_decorators as td
11-
1210
from pandas.core.dtypes.common import (
1311
is_float_dtype,
1412
is_integer_dtype,
@@ -504,9 +502,6 @@ def test_multi_assign_broadcasting_rhs(self):
504502
df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
505503
tm.assert_frame_equal(df, expected)
506504

507-
# TODO(ArrayManager) setting single item with an iterable doesn't work yet
508-
# in the "split" path
509-
@td.skip_array_manager_not_yet_implemented
510505
def test_setitem_list(self):
511506

512507
# GH 6043

pandas/tests/indexing/test_loc.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -676,18 +676,14 @@ def test_loc_modify_datetime(self):
676676

677677
tm.assert_frame_equal(df, expected)
678678

679-
def test_loc_setitem_frame_with_reindex(self, using_array_manager):
679+
def test_loc_setitem_frame_with_reindex(self):
680680
# GH#6254 setting issue
681681
df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
682682
df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")
683683

684684
# setting integer values into a float dataframe with loc is inplace,
685685
# so we retain float dtype
686686
ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
687-
if using_array_manager:
688-
# TODO(ArrayManager) with "split" path, we still overwrite the column
689-
# and therefore don't take the dtype of the underlying object into account
690-
ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
691687
expected = DataFrame({"A": ser})
692688
tm.assert_frame_equal(df, expected)
693689

@@ -709,9 +705,6 @@ def test_loc_setitem_frame_with_inverted_slice(self):
709705
expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3])
710706
tm.assert_frame_equal(df, expected)
711707

712-
# TODO(ArrayManager) "split" path overwrites column and therefore don't take
713-
# the dtype of the underlying object into account
714-
@td.skip_array_manager_not_yet_implemented
715708
def test_loc_setitem_empty_frame(self):
716709
# GH#6252 setting with an empty frame
717710
keys1 = ["@" + str(i) for i in range(5)]
@@ -1232,6 +1225,10 @@ def test_loc_getitem_time_object(self, frame_or_series):
12321225
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
12331226
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
12341227
@td.skip_if_no_scipy
1228+
@pytest.mark.filterwarnings(
1229+
# TODO(2.0): remove filtering; note only needed for using_array_manager
1230+
"ignore:The behavior of .astype from SparseDtype.*FutureWarning"
1231+
)
12351232
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
12361233
import scipy.sparse
12371234

0 commit comments

Comments
 (0)