Skip to content

REF: preserve Index dtype in BlockManager._combine #41354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,9 +274,6 @@ def apply(
else:
new_axes = self._axes

if len(result_arrays) == 0:
return self.make_empty(new_axes)

# error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
# expected "List[Union[ndarray, ExtensionArray]]"
return type(self)(result_arrays, new_axes) # type: ignore[arg-type]
Expand Down Expand Up @@ -487,7 +484,7 @@ def _get_data_subset(self: T, predicate: Callable) -> T:
indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)]
arrays = [self.arrays[i] for i in indices]
# TODO copy?
new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="int64")]]
new_axes = [self._axes[0], self._axes[1][np.array(indices, dtype="intp")]]
return type(self)(arrays, new_axes, verify_integrity=False)

def get_bool_data(self: T, copy: bool = False) -> T:
Expand Down Expand Up @@ -696,7 +693,6 @@ def _equal_values(self, other) -> bool:
return True

# TODO
# equals
# to_dict


Expand Down
14 changes: 9 additions & 5 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,6 @@ def apply(
if ignore_failures:
return self._combine(result_blocks)

if len(result_blocks) == 0:
return self.make_empty(self.axes)

return type(self).from_blocks(result_blocks, self.axes)

def where(self: T, other, cond, align: bool, errors: str) -> T:
Expand Down Expand Up @@ -532,6 +529,13 @@ def _combine(
) -> T:
""" return a new manager with the blocks """
if len(blocks) == 0:
if self.ndim == 2:
# retain our own Index dtype
if index is not None:
axes = [self.items[:0], index]
else:
axes = [self.items[:0]] + self.axes[1:]
return self.make_empty(axes)
return self.make_empty()

# FIXME: optimization potential
Expand Down Expand Up @@ -1233,7 +1237,7 @@ def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
index = Index(range(result_blocks[0].values.shape[-1]))

if ignore_failures:
return self._combine(result_blocks, index=index)
return self._combine(result_blocks, copy=False, index=index)

return type(self).from_blocks(result_blocks, [self.axes[0], index])

Expand Down Expand Up @@ -1270,7 +1274,7 @@ def reduce(
new_mgr = self._combine(res_blocks, copy=False, index=index)
else:
indexer = []
new_mgr = type(self).from_blocks([], [Index([]), index])
new_mgr = type(self).from_blocks([], [self.items[:0], index])
else:
indexer = np.arange(self.shape[0])
new_mgr = type(self).from_blocks(res_blocks, [self.items, index])
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/generic/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_rename(self):

# multiple axes at once

def test_get_numeric_data(self, using_array_manager):
def test_get_numeric_data(self):

n = 4
kwargs = {
Expand All @@ -100,9 +100,9 @@ def test_get_numeric_data(self, using_array_manager):
# non-inclusion
result = o._get_bool_data()
expected = self._construct(n, value="empty", **kwargs)
if using_array_manager and isinstance(o, DataFrame):
# INFO(ArrayManager) preserve the dtype of the columns Index
expected.columns = expected.columns.astype("int64")
if isinstance(o, DataFrame):
# preserve columns dtype
expected.columns = o.columns[:0]
self._compare(result, expected)

# get the bool data
Expand Down