Skip to content

Commit 8945a42

Browse files
[ArrayManager] TST: run (+fix/skip) pandas/tests/indexing tests (#40325)
1 parent b4c554f commit 8945a42

File tree

11 files changed

+174
-37
lines changed

11 files changed

+174
-37
lines changed

.github/workflows/ci.yml

+1-4
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,7 @@ jobs:
163163
pytest pandas/tests/resample/
164164
pytest pandas/tests/reshape/merge
165165
pytest pandas/tests/series/
166-
167-
# indexing subset (temporary since other tests don't pass yet)
168-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups
169-
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column
166+
pytest pandas/tests/indexing/
170167
171168
pytest pandas/tests/api/
172169
pytest pandas/tests/apply/

pandas/core/indexers.py

+2
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,8 @@ def length_of_indexer(indexer, target=None) -> int:
342342
# GH#25774
343343
return indexer.sum()
344344
return len(indexer)
345+
elif isinstance(indexer, range):
346+
return (indexer.stop - indexer.start) // indexer.step
345347
elif not is_list_like_indexer(indexer):
346348
return 1
347349
raise AssertionError("cannot find the length of the indexer")

pandas/core/internals/array_manager.py

+8
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ def _verify_integrity(self) -> None:
228228
"Passed arrays should be np.ndarray or ExtensionArray instances, "
229229
f"got {type(arr)} instead"
230230
)
231+
if not arr.ndim == 1:
232+
raise ValueError(
233+
"Passed arrays should be 1-dimensional, got array with "
234+
f"{arr.ndim} dimensions instead."
235+
)
231236

232237
def reduce(
233238
self: T, func: Callable, ignore_failures: bool = False
@@ -1040,6 +1045,9 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T:
10401045
else np.asanyarray(indexer, dtype="int64")
10411046
)
10421047

1048+
if not indexer.ndim == 1:
1049+
raise ValueError("indexer should be 1-dimensional")
1050+
10431051
n = self.shape_proper[axis]
10441052
indexer = maybe_convert_indices(indexer, n, verify=verify)
10451053

pandas/tests/indexing/multiindex/test_chaining_and_caching.py

+3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
from pandas import (
57
DataFrame,
68
MultiIndex,
@@ -32,6 +34,7 @@ def test_detect_chained_assignment():
3234
zed["eyes"]["right"].fillna(value=555, inplace=True)
3335

3436

37+
@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view
3538
def test_cache_updating():
3639
# 5216
3740
# make sure that we don't try to set a dead cache

pandas/tests/indexing/multiindex/test_partial.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
from pandas import (
57
DataFrame,
68
Float64Index,
@@ -114,6 +116,9 @@ def test_getitem_partial_column_select(self):
114116
with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
115117
df.loc[("a", "foo"), :]
116118

119+
# TODO(ArrayManager) rewrite test to not use .values
120+
# exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
121+
@td.skip_array_manager_invalid_test
117122
def test_partial_set(self, multiindex_year_month_day_dataframe_random_data):
118123
# GH #397
119124
ymd = multiindex_year_month_day_dataframe_random_data

pandas/tests/indexing/multiindex/test_setitem.py

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
import pandas.util._test_decorators as td
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -119,6 +121,9 @@ def test_setitem_multiindex3(self):
119121
expected=copy,
120122
)
121123

124+
# TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in
125+
# all NaNs -> doesn't work in the "split" path (also for BlockManager actually)
126+
@td.skip_array_manager_not_yet_implemented
122127
def test_multiindex_setitem(self):
123128

124129
# GH 3738
@@ -457,6 +462,8 @@ def test_setitem_new_column_all_na(self):
457462
assert df["new"].isna().all()
458463

459464

465+
@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values
466+
# is not a view
460467
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
461468
# this works because we are modifying the underlying array
462469
# really a no-no

pandas/tests/indexing/test_chaining_and_caching.py

+39-13
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
import pandas.util._test_decorators as td
7+
68
import pandas as pd
79
from pandas import (
810
DataFrame,
@@ -169,7 +171,7 @@ def test_detect_chained_assignment(self):
169171
tm.assert_frame_equal(df, expected)
170172

171173
@pytest.mark.arm_slow
172-
def test_detect_chained_assignment_raises(self):
174+
def test_detect_chained_assignment_raises(self, using_array_manager):
173175

174176
# test with the chaining
175177
df = DataFrame(
@@ -180,13 +182,23 @@ def test_detect_chained_assignment_raises(self):
180182
)
181183
assert df._is_copy is None
182184

183-
with pytest.raises(com.SettingWithCopyError, match=msg):
184-
df["A"][0] = -5
185+
if not using_array_manager:
186+
with pytest.raises(com.SettingWithCopyError, match=msg):
187+
df["A"][0] = -5
185188

186-
with pytest.raises(com.SettingWithCopyError, match=msg):
187-
df["A"][1] = np.nan
189+
with pytest.raises(com.SettingWithCopyError, match=msg):
190+
df["A"][1] = np.nan
191+
192+
assert df["A"]._is_copy is None
188193

189-
assert df["A"]._is_copy is None
194+
else:
195+
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
196+
# a mixed dataframe
197+
df["A"][0] = -5
198+
df["A"][1] = -6
199+
expected = DataFrame([[-5, 2], [-6, 3]], columns=list("AB"))
200+
expected["B"] = expected["B"].astype("float64")
201+
tm.assert_frame_equal(df, expected)
190202

191203
@pytest.mark.arm_slow
192204
def test_detect_chained_assignment_fails(self):
@@ -219,18 +231,24 @@ def test_detect_chained_assignment_doc_example(self):
219231
df[indexer]["c"] = 42
220232

221233
@pytest.mark.arm_slow
222-
def test_detect_chained_assignment_object_dtype(self):
234+
def test_detect_chained_assignment_object_dtype(self, using_array_manager):
223235

224236
expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]})
225237
df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]})
226238

227-
with pytest.raises(com.SettingWithCopyError, match=msg):
228-
df["A"][0] = 111
229-
230239
with pytest.raises(com.SettingWithCopyError, match=msg):
231240
df.loc[0]["A"] = 111
232241

233-
df.loc[0, "A"] = 111
242+
if not using_array_manager:
243+
with pytest.raises(com.SettingWithCopyError, match=msg):
244+
df["A"][0] = 111
245+
246+
df.loc[0, "A"] = 111
247+
else:
248+
# INFO(ArrayManager) for ArrayManager it doesn't matter that it's
249+
# a mixed dataframe
250+
df["A"][0] = 111
251+
234252
tm.assert_frame_equal(df, expected)
235253

236254
@pytest.mark.arm_slow
@@ -347,7 +365,7 @@ def test_detect_chained_assignment_undefined_column(self):
347365
df.iloc[0:5]["group"] = "a"
348366

349367
@pytest.mark.arm_slow
350-
def test_detect_chained_assignment_changing_dtype(self):
368+
def test_detect_chained_assignment_changing_dtype(self, using_array_manager):
351369

352370
# Mixed type setting but same dtype & changing dtype
353371
df = DataFrame(
@@ -365,8 +383,14 @@ def test_detect_chained_assignment_changing_dtype(self):
365383
with pytest.raises(com.SettingWithCopyError, match=msg):
366384
df.loc[2]["C"] = "foo"
367385

368-
with pytest.raises(com.SettingWithCopyError, match=msg):
386+
if not using_array_manager:
387+
with pytest.raises(com.SettingWithCopyError, match=msg):
388+
df["C"][2] = "foo"
389+
else:
390+
# INFO(ArrayManager) for ArrayManager it doesn't matter if it's
391+
# changing the dtype or not
369392
df["C"][2] = "foo"
393+
assert df.loc[2, "C"] == "foo"
370394

371395
def test_setting_with_copy_bug(self):
372396

@@ -411,6 +435,8 @@ def test_detect_chained_assignment_warnings_filter_and_dupe_cols(self):
411435
)
412436
tm.assert_frame_equal(df, expected)
413437

438+
# TODO(ArrayManager) fast_xs with array-like scalars is not yet working
439+
@td.skip_array_manager_not_yet_implemented
414440
def test_chained_getitem_with_lists(self):
415441

416442
# GH6394

pandas/tests/indexing/test_iloc.py

+50-13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import pytest
1212

13+
import pandas.util._test_decorators as td
14+
1315
from pandas import (
1416
Categorical,
1517
CategoricalDtype,
@@ -63,26 +65,30 @@ class TestiLocBaseIndependent:
6365
],
6466
)
6567
@pytest.mark.parametrize("indexer", [tm.loc, tm.iloc])
66-
def test_iloc_setitem_fullcol_categorical(self, indexer, key):
68+
def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager):
6769
frame = DataFrame({0: range(3)}, dtype=object)
6870

6971
cat = Categorical(["alpha", "beta", "gamma"])
7072

71-
assert frame._mgr.blocks[0]._can_hold_element(cat)
73+
if not using_array_manager:
74+
assert frame._mgr.blocks[0]._can_hold_element(cat)
7275

7376
df = frame.copy()
7477
orig_vals = df.values
7578
indexer(df)[key, 0] = cat
7679

7780
overwrite = isinstance(key, slice) and key == slice(None)
7881

79-
if overwrite:
82+
if overwrite or using_array_manager:
83+
# TODO(ArrayManager) we always overwrite because ArrayManager takes
84+
# the "split" path, which still overwrites
8085
# TODO: GH#39986 this probably shouldn't behave differently
8186
expected = DataFrame({0: cat})
8287
assert not np.shares_memory(df.values, orig_vals)
8388
else:
8489
expected = DataFrame({0: cat}).astype(object)
85-
assert np.shares_memory(df.values, orig_vals)
90+
if not using_array_manager:
91+
assert np.shares_memory(df[0].values, orig_vals)
8692

8793
tm.assert_frame_equal(df, expected)
8894

@@ -93,13 +99,27 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key):
9399
else:
94100
assert cat[0] != "gamma"
95101

102+
# TODO with mixed dataframe ("split" path), we always overwrite the column
103+
frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)})
104+
df = frame.copy()
105+
orig_vals = df.values
106+
indexer(df)[key, 0] = cat
107+
expected = DataFrame({0: cat, 1: range(3)})
108+
tm.assert_frame_equal(df, expected)
109+
110+
# TODO(ArrayManager) does not yet update parent
111+
@td.skip_array_manager_not_yet_implemented
96112
@pytest.mark.parametrize("box", [array, Series])
97-
def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
113+
def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager):
98114
# GH#38952 Case with not setting a full column
99115
# IntegerArray without NAs
100116
arr = array([1, 2, 3, 4])
101117
obj = frame_or_series(arr.to_numpy("i8"))
102-
values = obj.values
118+
119+
if frame_or_series is Series or not using_array_manager:
120+
values = obj.values
121+
else:
122+
values = obj[0].values
103123

104124
obj.iloc[:2] = box(arr[2:])
105125
expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8"))
@@ -109,7 +129,10 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box):
109129
if frame_or_series is Series:
110130
assert obj.values is values
111131
else:
112-
assert obj.values.base is values.base and values.base is not None
132+
if using_array_manager:
133+
assert obj[0].values is values
134+
else:
135+
assert obj.values.base is values.base and values.base is not None
113136

114137
def test_is_scalar_access(self):
115138
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -481,13 +504,16 @@ def test_iloc_setitem_dups(self):
481504
df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True)
482505
tm.assert_frame_equal(df, expected)
483506

484-
def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self):
507+
def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(
508+
self, using_array_manager
509+
):
485510
# Same as the "assign back to self" check in test_iloc_setitem_dups
486511
# but on a DataFrame with multiple blocks
487512
df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"])
488513

489514
df.iloc[:, 0] = df.iloc[:, 0].astype("f8")
490-
assert len(df._mgr.blocks) == 2
515+
if not using_array_manager:
516+
assert len(df._mgr.blocks) == 2
491517
expected = df.copy()
492518

493519
# assign back to self
@@ -577,7 +603,7 @@ def test_iloc_getitem_labelled_frame(self):
577603
with pytest.raises(ValueError, match=msg):
578604
df.iloc["j", "D"]
579605

580-
def test_iloc_getitem_doc_issue(self):
606+
def test_iloc_getitem_doc_issue(self, using_array_manager):
581607

582608
# multi axis slicing issue with single block
583609
# surfaced in GH 6059
@@ -612,7 +638,8 @@ def test_iloc_getitem_doc_issue(self):
612638
columns = list(range(0, 8, 2))
613639
df = DataFrame(arr, index=index, columns=columns)
614640

615-
df._mgr.blocks[0].mgr_locs
641+
if not using_array_manager:
642+
df._mgr.blocks[0].mgr_locs
616643
result = df.iloc[1:5, 2:4]
617644
str(result)
618645
result.dtypes
@@ -793,15 +820,20 @@ def test_iloc_empty_list_indexer_is_ok(self):
793820
df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True
794821
)
795822

796-
def test_identity_slice_returns_new_object(self):
823+
def test_identity_slice_returns_new_object(self, using_array_manager):
797824
# GH13873
798825
original_df = DataFrame({"a": [1, 2, 3]})
799826
sliced_df = original_df.iloc[:]
800827
assert sliced_df is not original_df
801828

802829
# should be a shallow copy
803830
original_df["a"] = [4, 4, 4]
804-
assert (sliced_df["a"] == 4).all()
831+
if using_array_manager:
832+
# TODO(ArrayManager) verify it is expected that the original didn't change
833+
# setitem is replacing full column, so doesn't update "viewing" dataframe
834+
assert not (sliced_df["a"] == 4).all()
835+
else:
836+
assert (sliced_df["a"] == 4).all()
805837

806838
original_series = Series([1, 2, 3, 4, 5, 6])
807839
sliced_series = original_series.iloc[:]
@@ -932,6 +964,9 @@ def test_iloc_getitem_readonly_key(self):
932964
expected = df["data"].loc[[1, 3, 6]]
933965
tm.assert_series_equal(result, expected)
934966

967+
# TODO(ArrayManager) setting single item with an iterable doesn't work yet
968+
# in the "split" path
969+
@td.skip_array_manager_not_yet_implemented
935970
def test_iloc_assign_series_to_df_cell(self):
936971
# GH 37593
937972
df = DataFrame(columns=["a"], index=[0])
@@ -1088,6 +1123,8 @@ def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
10881123
# GH#32257 we let numpy do validation, get their exception
10891124
float_frame.iloc[:, :, :] = 1
10901125

1126+
# TODO(ArrayManager) "split" path doesn't properly implement DataFrame indexer
1127+
@td.skip_array_manager_not_yet_implemented
10911128
def test_iloc_frame_indexer(self):
10921129
# GH#39004
10931130
df = DataFrame({"a": [1, 2, 3]})

0 commit comments

Comments
 (0)