Skip to content

[ArrayManager] Indexing - implement iset #39734

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,9 @@ jobs:
run: |
source activate pandas-dev
pytest pandas/tests/frame/methods --array-manager

# indexing iset related (temporary since other tests don't pass yet)
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager
9 changes: 0 additions & 9 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9723,12 +9723,3 @@ def _reindex_for_setitem(value: FrameOrSeriesUnion, index: Index) -> ArrayLike:
"incompatible index of inserted column with frame index"
) from err
return reindexed_value


def _maybe_atleast_2d(value):
# TODO(EA2D): not needed with 2D EAs

if is_extension_array_dtype(value):
return value

return np.atleast_2d(np.asarray(value))
43 changes: 36 additions & 7 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -659,24 +659,53 @@ def idelete(self, indexer):

def iset(self, loc: Union[int, slice, np.ndarray], value):
"""
Set new item in-place. Does not consolidate. Adds new Block if not
contained in the current set of items
Set new column(s).

This changes the ArrayManager in-place, but replaces (an) existing
column(s), not changing column values in-place).

Parameters
----------
loc : integer, slice or boolean mask
Positional location (already bounds checked)
value : array-like
"""
# single column -> single integer index
if lib.is_integer(loc):
# TODO normalize array -> this should in theory not be needed?
# TODO the extract array should in theory not be needed?
value = extract_array(value, extract_numpy=True)

# TODO can we avoid needing to unpack this here? That means converting
# DataFrame into 1D array when loc is an integer
if isinstance(value, np.ndarray) and value.ndim == 2:
assert value.shape[1] == 1
value = value[0, :]

assert isinstance(value, (np.ndarray, ExtensionArray))
# value = np.asarray(value)
# assert isinstance(value, np.ndarray)
assert value.ndim == 1
assert len(value) == len(self._axes[0])
self.arrays[loc] = value
return

# TODO
raise Exception
# multiple columns -> convert slice or array to integer indices
elif isinstance(loc, slice):
indices = range(
loc.start if loc.start is not None else 0,
loc.stop if loc.stop is not None else self.shape_proper[1],
loc.step if loc.step is not None else 1,
)
else:
assert isinstance(loc, np.ndarray)
assert loc.dtype == "bool"
indices = np.nonzero(loc)[0]

assert value.ndim == 2
assert value.shape[0] == len(self._axes[0])

for value_idx, mgr_idx in enumerate(indices):
value_arr = value[:, value_idx]
self.arrays[mgr_idx] = value_arr
return

def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False):
"""
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,8 +1140,7 @@ def insert(self, loc: int, item: Hashable, value, allow_duplicates: bool = False

if value.ndim == 2:
value = value.T

if value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
elif value.ndim == self.ndim - 1 and not is_extension_array_dtype(value.dtype):
# TODO(EA2D): special case not needed with 2D EAs
value = safe_reshape(value, (1,) + value.shape)

Expand Down