Skip to content

Commit c331ba8

Browse files
[ArrayManager] TST: run (+fix/skip) pandas/tests/frame/indexing tests (#40323)
1 parent 2b2e9b6 commit c331ba8

File tree

7 files changed

+111
-46
lines changed

7 files changed

+111
-46
lines changed

.github/workflows/ci.yml

+1-8
Original file line numberDiff line numberDiff line change
@@ -155,10 +155,7 @@ jobs:
155155
run: |
156156
source activate pandas-dev
157157
158-
pytest pandas/tests/frame/methods
159-
pytest pandas/tests/frame/test_constructors.py
160-
pytest pandas/tests/frame/test_*
161-
pytest pandas/tests/frame/test_reductions.py
158+
pytest pandas/tests/frame/
162159
pytest pandas/tests/reductions/
163160
pytest pandas/tests/generic/test_generic.py
164161
pytest pandas/tests/arithmetic/
@@ -170,10 +167,6 @@ jobs:
170167
pytest pandas/tests/series/test_*
171168
172169
# indexing subset (temporary since other tests don't pass yet)
173-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean
174-
pytest pandas/tests/frame/indexing/test_where.py
175-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index
176-
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns
177170
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
178171
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
179172

pandas/core/internals/array_manager.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
861861
# DataFrame into 1D array when loc is an integer
862862
if isinstance(value, np.ndarray) and value.ndim == 2:
863863
assert value.shape[1] == 1
864-
value = value[0, :]
864+
value = value[:, 0]
865865

866866
# TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item
867867
# but we should avoid that and pass directly the proper array
@@ -1163,10 +1163,6 @@ def axes(self):
11631163
def index(self) -> Index:
11641164
return self._axes[0]
11651165

1166-
@property
1167-
def array(self):
1168-
return self.arrays[0]
1169-
11701166
@property
11711167
def dtype(self):
11721168
return self.array.dtype

pandas/core/internals/base.py

+7
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,13 @@ def isna(self: T, func) -> T:
126126
class SingleDataManager(DataManager):
127127
ndim = 1
128128

129+
@property
130+
def array(self):
131+
"""
132+
Quick access to the backing array of the Block or SingleArrayManager.
133+
"""
134+
return self.arrays[0] # type: ignore[attr-defined]
135+
129136

130137
def interleaved_dtype(dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
131138
"""

pandas/tests/frame/indexing/test_indexing.py

+46-17
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99

1010
from pandas._libs import iNaT
11+
import pandas.util._test_decorators as td
1112

1213
from pandas.core.dtypes.common import is_integer
1314

@@ -534,6 +535,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self):
534535
with pytest.raises(KeyError, match=r"^3$"):
535536
df2.loc[3:11] = 0
536537

538+
@td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view
537539
def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame):
538540
sliced = float_string_frame.iloc[:, -3:]
539541
assert sliced["D"].dtype == np.float64
@@ -592,6 +594,7 @@ def test_getitem_fancy_scalar(self, float_frame):
592594
for idx in f.index[::5]:
593595
assert ix[idx, col] == ts[idx]
594596

597+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
595598
def test_setitem_fancy_scalar(self, float_frame):
596599
f = float_frame
597600
expected = float_frame.copy()
@@ -631,6 +634,7 @@ def test_getitem_fancy_boolean(self, float_frame):
631634
expected = f.reindex(index=f.index[boolvec], columns=["C", "D"])
632635
tm.assert_frame_equal(result, expected)
633636

637+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
634638
def test_setitem_fancy_boolean(self, float_frame):
635639
# from 2d, set with booleans
636640
frame = float_frame.copy()
@@ -990,21 +994,29 @@ def test_iloc_row(self):
990994
expected = df.loc[8:14]
991995
tm.assert_frame_equal(result, expected)
992996

997+
# list of integers
998+
result = df.iloc[[1, 2, 4, 6]]
999+
expected = df.reindex(df.index[[1, 2, 4, 6]])
1000+
tm.assert_frame_equal(result, expected)
1001+
1002+
def test_iloc_row_slice_view(self, using_array_manager):
1003+
df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2))
1004+
original = df.copy()
1005+
9931006
# verify slice is view
9941007
# setting it makes it raise/warn
1008+
subset = df.iloc[slice(4, 8)]
1009+
9951010
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
9961011
with pytest.raises(com.SettingWithCopyError, match=msg):
997-
result[2] = 0.0
1012+
subset[2] = 0.0
9981013

999-
exp_col = df[2].copy()
1000-
exp_col[4:8] = 0.0
1014+
exp_col = original[2].copy()
1015+
# TODO(ArrayManager) verify it is expected that the original didn't change
1016+
if not using_array_manager:
1017+
exp_col[4:8] = 0.0
10011018
tm.assert_series_equal(df[2], exp_col)
10021019

1003-
# list of integers
1004-
result = df.iloc[[1, 2, 4, 6]]
1005-
expected = df.reindex(df.index[[1, 2, 4, 6]])
1006-
tm.assert_frame_equal(result, expected)
1007-
10081020
def test_iloc_col(self):
10091021

10101022
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
@@ -1022,19 +1034,32 @@ def test_iloc_col(self):
10221034
expected = df.loc[:, 8:14]
10231035
tm.assert_frame_equal(result, expected)
10241036

1025-
# verify slice is view
1026-
# and that we are setting a copy
1027-
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
1028-
with pytest.raises(com.SettingWithCopyError, match=msg):
1029-
result[8] = 0.0
1030-
1031-
assert (df[8] == 0).all()
1032-
10331037
# list of integers
10341038
result = df.iloc[:, [1, 2, 4, 6]]
10351039
expected = df.reindex(columns=df.columns[[1, 2, 4, 6]])
10361040
tm.assert_frame_equal(result, expected)
10371041

1042+
def test_iloc_col_slice_view(self, using_array_manager):
1043+
df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2))
1044+
original = df.copy()
1045+
subset = df.iloc[:, slice(4, 8)]
1046+
1047+
if not using_array_manager:
1048+
# verify slice is view
1049+
# and that we are setting a copy
1050+
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
1051+
with pytest.raises(com.SettingWithCopyError, match=msg):
1052+
subset[8] = 0.0
1053+
1054+
assert (df[8] == 0).all()
1055+
else:
1056+
# TODO(ArrayManager) verify this is the desired behaviour
1057+
subset[8] = 0.0
1058+
# subset changed
1059+
assert (subset[8] == 0).all()
1060+
# but df itself did not change (setitem replaces full column)
1061+
tm.assert_frame_equal(df, original)
1062+
10381063
def test_loc_duplicates(self):
10391064
# gh-17105
10401065

@@ -1218,7 +1243,7 @@ def test_setitem(self, uint64_frame):
12181243
)
12191244

12201245

1221-
def test_object_casting_indexing_wraps_datetimelike():
1246+
def test_object_casting_indexing_wraps_datetimelike(using_array_manager):
12221247
# GH#31649, check the indexing methods all the way down the stack
12231248
df = DataFrame(
12241249
{
@@ -1240,6 +1265,10 @@ def test_object_casting_indexing_wraps_datetimelike():
12401265
assert isinstance(ser.values[1], Timestamp)
12411266
assert isinstance(ser.values[2], pd.Timedelta)
12421267

1268+
if using_array_manager:
1269+
# remainder of the test checking BlockManager internals
1270+
return
1271+
12431272
mgr = df._mgr
12441273
mgr._rebuild_blknos_and_blklocs()
12451274
arr = mgr.fast_xs(0)

pandas/tests/frame/indexing/test_insert.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,17 @@ def test_insert_with_columns_dups(self):
7272
)
7373
tm.assert_frame_equal(df, exp)
7474

75-
def test_insert_item_cache(self):
75+
def test_insert_item_cache(self, using_array_manager):
7676
df = DataFrame(np.random.randn(4, 3))
7777
ser = df[0]
7878

79-
with tm.assert_produces_warning(PerformanceWarning):
79+
if using_array_manager:
80+
expected_warning = None
81+
else:
82+
# with BlockManager warn about high fragmentation of single dtype
83+
expected_warning = PerformanceWarning
84+
85+
with tm.assert_produces_warning(expected_warning):
8086
for n in range(100):
8187
df[n + 3] = df[1] * n
8288

pandas/tests/frame/indexing/test_setitem.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
from pandas.core.dtypes.base import registry as ea_registry
79
from pandas.core.dtypes.common import (
810
is_categorical_dtype,
@@ -298,12 +300,12 @@ def test_setitem_dt64tz(self, timezone_frame):
298300

299301
# assert that A & C are not sharing the same base (e.g. they
300302
# are copies)
301-
b1 = df._mgr.blocks[1]
302-
b2 = df._mgr.blocks[2]
303-
tm.assert_extension_array_equal(b1.values, b2.values)
304-
b1base = b1.values._data.base
305-
b2base = b2.values._data.base
306-
assert b1base is None or (id(b1base) != id(b2base))
303+
v1 = df._mgr.arrays[1]
304+
v2 = df._mgr.arrays[2]
305+
tm.assert_extension_array_equal(v1, v2)
306+
v1base = v1._data.base
307+
v2base = v2._data.base
308+
assert v1base is None or (id(v1base) != id(v2base))
307309

308310
# with nan
309311
df2 = df.copy()
@@ -366,7 +368,7 @@ def test_setitem_frame_length_0_str_key(self, indexer):
366368
expected["A"] = expected["A"].astype("object")
367369
tm.assert_frame_equal(df, expected)
368370

369-
def test_setitem_frame_duplicate_columns(self):
371+
def test_setitem_frame_duplicate_columns(self, using_array_manager):
370372
# GH#15695
371373
cols = ["A", "B", "C"] * 2
372374
df = DataFrame(index=range(3), columns=cols)
@@ -382,6 +384,11 @@ def test_setitem_frame_duplicate_columns(self):
382384
columns=cols,
383385
dtype="object",
384386
)
387+
if using_array_manager:
388+
# setitem replaces column so changes dtype
389+
expected["C"] = expected["C"].astype("int64")
390+
# TODO(ArrayManager) .loc still overwrites
391+
expected["B"] = expected["B"].astype("int64")
385392
tm.assert_frame_equal(df, expected)
386393

387394
@pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]])
@@ -628,6 +635,8 @@ def test_setitem_object_array_of_tzaware_datetimes(self, idx, expected):
628635

629636

630637
class TestDataFrameSetItemWithExpansion:
638+
# TODO(ArrayManager) update parent (_maybe_update_cacher)
639+
@td.skip_array_manager_not_yet_implemented
631640
def test_setitem_listlike_views(self):
632641
# GH#38148
633642
df = DataFrame({"a": [1, 2, 3], "b": [4, 4, 6]})
@@ -699,7 +708,7 @@ def test_setitem_with_expansion_categorical_dtype(self):
699708

700709
result1 = df["D"]
701710
result2 = df["E"]
702-
tm.assert_categorical_equal(result1._mgr._block.values, cat)
711+
tm.assert_categorical_equal(result1._mgr.array, cat)
703712

704713
# sorting
705714
ser.name = "E"
@@ -767,6 +776,7 @@ def inc(x):
767776

768777

769778
class TestDataFrameSetItemBooleanMask:
779+
@td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values
770780
@pytest.mark.parametrize(
771781
"mask_type",
772782
[lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values],

pandas/tests/frame/indexing/test_xs.py

+30-6
Original file line numberDiff line numberDiff line change
@@ -109,14 +109,22 @@ def test_xs_keep_level(self):
109109
result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False)
110110
tm.assert_frame_equal(result, expected)
111111

112-
def test_xs_view(self):
112+
def test_xs_view(self, using_array_manager):
113113
# in 0.14 this will return a view if possible a copy otherwise, but
114114
# this is numpy dependent
115115

116116
dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5))
117117

118-
dm.xs(2)[:] = 10
119-
assert (dm.xs(2) == 10).all()
118+
if using_array_manager:
119+
# INFO(ArrayManager) with ArrayManager getting a row as a view is
120+
# not possible
121+
msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame"
122+
with pytest.raises(com.SettingWithCopyError, match=msg):
123+
dm.xs(2)[:] = 20
124+
assert not (dm.xs(2) == 20).any()
125+
else:
126+
dm.xs(2)[:] = 20
127+
assert (dm.xs(2) == 20).all()
120128

121129

122130
class TestXSWithMultiIndex:
@@ -327,10 +335,26 @@ def test_xs_droplevel_false(self):
327335
expected = DataFrame({"a": [1]})
328336
tm.assert_frame_equal(result, expected)
329337

330-
def test_xs_droplevel_false_view(self):
338+
def test_xs_droplevel_false_view(self, using_array_manager):
331339
# GH#37832
332340
df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"]))
333341
result = df.xs("a", axis=1, drop_level=False)
334-
df.values[0, 0] = 2
335-
expected = DataFrame({"a": [2]})
342+
# check that result still views the same data as df
343+
assert np.shares_memory(result.iloc[:, 0]._values, df.iloc[:, 0]._values)
344+
# modifying original df also modifies result when having a single block
345+
df.iloc[0, 0] = 2
346+
if not using_array_manager:
347+
expected = DataFrame({"a": [2]})
348+
else:
349+
# TODO(ArrayManager) iloc does not update the array inplace using
350+
# "split" path
351+
expected = DataFrame({"a": [1]})
352+
tm.assert_frame_equal(result, expected)
353+
354+
# with mixed dataframe, modifying the parent doesn't modify result
355+
# TODO the "split" path behaves differently here as with single block
356+
df = DataFrame([[1, 2.5, "a"]], columns=Index(["a", "b", "c"]))
357+
result = df.xs("a", axis=1, drop_level=False)
358+
df.iloc[0, 0] = 2
359+
expected = DataFrame({"a": [1]})
336360
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)