Skip to content

Commit a9cacd9

Browse files
[ArrayManager] Indexing - implement iset (#39734)
1 parent 52d9bb9 commit a9cacd9

File tree

4 files changed

+43
-18
lines changed

4 files changed

+43
-18
lines changed

.github/workflows/ci.yml

+6
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,9 @@ jobs:
153153
run: |
154154
source activate pandas-dev
155155
pytest pandas/tests/frame/methods --array-manager
156+
157+
# indexing iset related (temporary since other tests don't pass yet)
158+
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager
159+
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager
160+
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager
161+
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager

pandas/core/frame.py

-9
Original file line numberDiff line numberDiff line change
@@ -9723,12 +9723,3 @@ def _reindex_for_setitem(value: FrameOrSeriesUnion, index: Index) -> ArrayLike:
97239723
"incompatible index of inserted column with frame index"
97249724
) from err
97259725
return reindexed_value
9726-
9727-
9728-
def _maybe_atleast_2d(value):
9729-
# TODO(EA2D): not needed with 2D EAs
9730-
9731-
if is_extension_array_dtype(value):
9732-
return value
9733-
9734-
return np.atleast_2d(np.asarray(value))

pandas/core/internals/array_manager.py

+36-7
Original file line numberDiff line numberDiff line change
@@ -659,24 +659,53 @@ def idelete(self, indexer):
659659

660660
def iset(self, loc: Union[int, slice, np.ndarray], value):
661661
"""
662-
Set new item in-place. Does not consolidate. Adds new Block if not
663-
contained in the current set of items
662+
Set new column(s).
663+
664+
This changes the ArrayManager in-place, but replaces (an) existing
665+
column(s), not changing column values in-place).
666+
667+
Parameters
668+
----------
669+
loc : integer, slice or boolean mask
670+
Positional location (already bounds checked)
671+
value : array-like
664672
"""
673+
# single column -> single integer index
665674
if lib.is_integer(loc):
666-
# TODO normalize array -> this should in theory not be needed?
675+
# TODO the extract array should in theory not be needed?
667676
value = extract_array(value, extract_numpy=True)
677+
678+
# TODO can we avoid needing to unpack this here? That means converting
679+
# DataFrame into 1D array when loc is an integer
668680
if isinstance(value, np.ndarray) and value.ndim == 2:
681+
assert value.shape[1] == 1
669682
value = value[0, :]
670683

671684
assert isinstance(value, (np.ndarray, ExtensionArray))
672-
# value = np.asarray(value)
673-
# assert isinstance(value, np.ndarray)
685+
assert value.ndim == 1
674686
assert len(value) == len(self._axes[0])
675687
self.arrays[loc] = value
676688
return
677689

678-
# TODO
679-
raise Exception
690+
# multiple columns -> convert slice or array to integer indices
691+
elif isinstance(loc, slice):
692+
indices = range(
693+
loc.start if loc.start is not None else 0,
694+
loc.stop if loc.stop is not None else self.shape_proper[1],
695+
loc.step if loc.step is not None else 1,
696+
)
697+
else:
698+
assert isinstance(loc, np.ndarray)
699+
assert loc.dtype == "bool"
700+
indices = np.nonzero(loc)[0]
701+
702+
assert value.ndim == 2
703+
assert value.shape[0] == len(self._axes[0])
704+
705+
for value_idx, mgr_idx in enumerate(indices):
706+
value_arr = value[:, value_idx]
707+
self.arrays[mgr_idx] = value_arr
708+
return
680709

681710
def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False):
682711
"""

pandas/core/internals/managers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1140,8 +1140,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False
11401140

11411141
if value.ndim == 2:
11421142
value = value.T
1143-
1144-
if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
1143+
elif value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
11451144
# TODO(EA2D): special case not needed with 2D EAs
11461145
value = safe_reshape(value, (1,) + value.shape)
11471146

0 commit comments

Comments
 (0)