Skip to content

BUG: ArrayManager indexing mismatched behavior #45639

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jan 28, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 6 additions & 7 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from pandas.util._decorators import doc
from pandas.util._exceptions import find_stack_level

from pandas.core.dtypes.cast import can_hold_element
from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
Expand Down Expand Up @@ -1583,15 +1584,13 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"):

# if there is only one block/type, still have to take split path
# unless the block is one-dimensional or it can hold the value
if (
not take_split_path
and getattr(self.obj._mgr, "blocks", False)
and self.ndim > 1
):
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
# in case of dict, keys are indices
val = list(value.values()) if isinstance(value, dict) else value
blk = self.obj._mgr.blocks[0]
take_split_path = not blk._can_hold_element(val)
arr = self.obj._mgr.arrays[0]
take_split_path = not can_hold_element(
arr, extract_array(val, extract_numpy=True)
)

# if we have any multi-indexes that have non-trivial slices
# (not null slices) then we must take the split path, xref
Expand Down
7 changes: 3 additions & 4 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,9 +341,8 @@ def where(self: T, other, cond, align: bool) -> T:
cond=cond,
)

# TODO what is this used for?
# def setitem(self, indexer, value) -> ArrayManager:
# return self.apply_with_block("setitem", indexer=indexer, value=value)
def setitem(self: T, indexer, value) -> T:
return self.apply_with_block("setitem", indexer=indexer, value=value)

def putmask(self, mask, new, align: bool = True):
if align:
Expand Down Expand Up @@ -467,7 +466,7 @@ def is_view(self) -> bool:

@property
def is_single_block(self) -> bool:
return False
return len(self.arrays) == 1

def _get_data_subset(self: T, predicate: Callable) -> T:
indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
Expand Down
20 changes: 0 additions & 20 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
import numpy as np
import pytest

from pandas.core.dtypes.dtypes import (
DatetimeTZDtype,
IntervalDtype,
PandasDtype,
PeriodDtype,
)

import pandas as pd
import pandas._testing as tm
from pandas.tests.extension.base.base import BaseExtensionTests
Expand Down Expand Up @@ -367,19 +360,6 @@ def test_setitem_series(self, data, full_indexer):
def test_setitem_frame_2d_values(self, data, request):
# GH#44514
df = pd.DataFrame({"A": data})

# Avoiding using_array_manager fixture
# https://github.com/pandas-dev/pandas/pull/44514#discussion_r754002410
using_array_manager = isinstance(df._mgr, pd.core.internals.ArrayManager)
if using_array_manager:
if not isinstance(
data.dtype, (PandasDtype, PeriodDtype, IntervalDtype, DatetimeTZDtype)
):
# These dtypes have non-broken implementations of _can_hold_element
mark = pytest.mark.xfail(reason="Goes through split path, loses dtype")
request.node.add_marker(mark)

df = pd.DataFrame({"A": data})
orig = df.copy()

df.iloc[:] = df
Expand Down
2 changes: 0 additions & 2 deletions pandas/tests/frame/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,8 +1212,6 @@ def test_setitem_array_as_cell_value(self):
expected = DataFrame({"a": [np.zeros((2,))], "b": [np.zeros((2, 2))]})
tm.assert_frame_equal(df, expected)

# with AM goes through split-path, loses dtype
@td.skip_array_manager_not_yet_implemented
def test_iloc_setitem_nullable_2d_values(self):
df = DataFrame({"A": [1, 2, 3]}, dtype="Int64")
orig = df.copy()
Expand Down
14 changes: 1 addition & 13 deletions pandas/tests/frame/methods/test_quantile.py
Original file line number Diff line number Diff line change
Expand Up @@ -673,19 +673,7 @@ def test_quantile_ea_with_na(self, obj, index):

# TODO(GH#39763): filtering can be removed after GH#39763 is fixed
@pytest.mark.filterwarnings("ignore:Using .astype to convert:FutureWarning")
def test_quantile_ea_all_na(
self, obj, index, frame_or_series, using_array_manager, request
):
if (
using_array_manager
and frame_or_series is DataFrame
and index.dtype == "m8[ns]"
):
mark = pytest.mark.xfail(
reason="obj.astype fails bc obj is incorrectly dt64 at this point"
)
request.node.add_marker(mark)

def test_quantile_ea_all_na(self, obj, index, frame_or_series, request):
obj.iloc[:] = index._na_value

# TODO(ArrayManager): this casting should be unnecessary after GH#39763 is fixed
Expand Down
18 changes: 4 additions & 14 deletions pandas/tests/indexing/test_iloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage

overwrite = isinstance(key, slice) and key == slice(None)

if overwrite or using_array_manager:
# TODO(ArrayManager) we always overwrite because ArrayManager takes
# the "split" path, which still overwrites
if overwrite:
# TODO: GH#39986 this probably shouldn't behave differently
expected = DataFrame({0: cat})
assert not np.shares_memory(df.values, orig_vals)
Expand All @@ -108,13 +106,13 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
tm.assert_frame_equal(df, expected)

@pytest.mark.parametrize("box", [array, Series])
def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
# GH#38952 Case with not setting a full column
# IntegerArray without NAs
arr = array([1, 2, 3, 4])
obj = frame_or_series(arr.to_numpy("i8"))

if frame_or_series is Series or not using_array_manager:
if frame_or_series is Series:
values = obj.values
else:
values = obj[0].values
Expand All @@ -131,10 +129,7 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager
if frame_or_series is Series:
assert obj.values is values
else:
if using_array_manager:
assert obj[0].values is values
else:
assert obj.values.base is values.base and values.base is not None
assert np.shares_memory(obj[0].values, values)

def test_is_scalar_access(self):
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
Expand Down Expand Up @@ -999,9 +994,6 @@ def test_iloc_getitem_readonly_key(self):
expected = df["data"].loc[[1, 3, 6]]
tm.assert_series_equal(result, expected)

# TODO(ArrayManager) setting single item with an iterable doesn't work yet
# in the "split" path
@td.skip_array_manager_not_yet_implemented
def test_iloc_assign_series_to_df_cell(self):
# GH 37593
df = DataFrame(columns=["a"], index=[0])
Expand Down Expand Up @@ -1224,8 +1216,6 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
# GH#32257 we let numpy do validation, get their exception
float_frame.iloc[:, :, :] = 1

# TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
@td.skip_array_manager_not_yet_implemented
def test_iloc_frame_indexer(self):
# GH#39004
df = DataFrame({"a": [1, 2, 3]})
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
import numpy as np
import pytest

import pandas.util._test_decorators as td

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -504,9 +502,6 @@ def test_multi_assign_broadcasting_rhs(self):
df.loc[df["A"] == 0, ["A", "B"]] = df["D"]
tm.assert_frame_equal(df, expected)

# TODO(ArrayManager) setting single item with an iterable doesn't work yet
# in the "split" path
@td.skip_array_manager_not_yet_implemented
def test_setitem_list(self):

# GH 6043
Expand Down
13 changes: 5 additions & 8 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,18 +669,14 @@ def test_loc_modify_datetime(self):

tm.assert_frame_equal(df, expected)

def test_loc_setitem_frame_with_reindex(self, using_array_manager):
def test_loc_setitem_frame_with_reindex(self):
# GH#6254 setting issue
df = DataFrame(index=[3, 5, 4], columns=["A"], dtype=float)
df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64")

# setting integer values into a float dataframe with loc is inplace,
# so we retain float dtype
ser = Series([2, 3, 1], index=[3, 5, 4], dtype=float)
if using_array_manager:
# TODO(ArrayManager) with "split" path, we still overwrite the column
# and therefore don't take the dtype of the underlying object into account
ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64")
expected = DataFrame({"A": ser})
tm.assert_frame_equal(df, expected)

Expand All @@ -702,9 +698,6 @@ def test_loc_setitem_frame_with_inverted_slice(self):
expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3])
tm.assert_frame_equal(df, expected)

# TODO(ArrayManager) "split" path overwrites column and therefore don't take
# the dtype of the underlying object into account
@td.skip_array_manager_not_yet_implemented
def test_loc_setitem_empty_frame(self):
# GH#6252 setting with an empty frame
keys1 = ["@" + str(i) for i in range(5)]
Expand Down Expand Up @@ -1225,6 +1218,10 @@ def test_loc_getitem_time_object(self, frame_or_series):
@pytest.mark.parametrize("spmatrix_t", ["coo_matrix", "csc_matrix", "csr_matrix"])
@pytest.mark.parametrize("dtype", [np.int64, np.float64, complex])
@td.skip_if_no_scipy
@pytest.mark.filterwarnings(
# TODO(2.0): remove filtering; note only needed for using_array_manager
"ignore:The behavior of .astype from SparseDtype.*FutureWarning"
)
def test_loc_getitem_range_from_spmatrix(self, spmatrix_t, dtype):
import scipy.sparse

Expand Down