Skip to content

CLN: use ._data less in reshape #33159

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 13 additions & 11 deletions pandas/core/reshape/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ def __init__(
# Need to flip BlockManager axis in the DataFrame special case
self._is_frame = isinstance(sample, ABCDataFrame)
if self._is_frame:
axis = 1 if axis == 0 else 0
axis = DataFrame._get_block_manager_axis(axis)

self._is_series = isinstance(sample, ABCSeries)
if not 0 <= axis <= sample.ndim:
Expand Down Expand Up @@ -436,7 +436,8 @@ def __init__(
self.objs.append(obj)

# note: this is the BlockManager axis (since DataFrame is transposed)
self.axis = axis
self.bm_axis = axis
self.axis = 1 - self.bm_axis if self._is_frame else 0
self.keys = keys
self.names = names or getattr(keys, "names", None)
self.levels = levels
Expand All @@ -454,7 +455,7 @@ def get_result(self):
if self._is_series:

# stack blocks
if self.axis == 0:
if self.bm_axis == 0:
name = com.consensus_name_attr(self.objs)

mgr = self.objs[0]._data.concat(
Expand All @@ -477,21 +478,22 @@ def get_result(self):
else:
mgrs_indexers = []
for obj in self.objs:
mgr = obj._data
indexers = {}
for ax, new_labels in enumerate(self.new_axes):
if ax == self.axis:
# ::-1 to convert BlockManager ax to DataFrame ax
if ax == self.bm_axis:
# Suppress reindexing on concat axis
continue

obj_labels = mgr.axes[ax]
# 1-ax to convert BlockManager axis to DataFrame axis
obj_labels = obj.axes[1 - ax]
if not new_labels.equals(obj_labels):
indexers[ax] = obj_labels.reindex(new_labels)[1]

mgrs_indexers.append((obj._data, indexers))

new_data = concatenate_block_managers(
mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy
mgrs_indexers, self.new_axes, concat_axis=self.bm_axis, copy=self.copy
)
if not self.copy:
new_data._consolidate_inplace()
Expand All @@ -500,15 +502,15 @@ def get_result(self):
return cons(new_data).__finalize__(self, method="concat")

def _get_result_dim(self) -> int:
if self._is_series and self.axis == 1:
if self._is_series and self.bm_axis == 1:
return 2
else:
return self.objs[0].ndim

def _get_new_axes(self) -> List[Index]:
ndim = self._get_result_dim()
return [
self._get_concat_axis() if i == self.axis else self._get_comb_axis(i)
self._get_concat_axis() if i == self.bm_axis else self._get_comb_axis(i)
for i in range(ndim)
]

Expand All @@ -527,7 +529,7 @@ def _get_concat_axis(self) -> Index:
Return index to be used along concatenation axis.
"""
if self._is_series:
if self.axis == 0:
if self.bm_axis == 0:
indexes = [x.index for x in self.objs]
elif self.ignore_index:
idx = ibase.default_index(len(self.objs))
Expand Down Expand Up @@ -555,7 +557,7 @@ def _get_concat_axis(self) -> Index:
else:
return ensure_index(self.keys).set_names(self.names)
else:
indexes = [x._data.axes[self.axis] for x in self.objs]
indexes = [x.axes[self.axis] for x in self.objs]

if self.ignore_index:
idx = ibase.default_index(sum(len(i) for i in indexes))
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,7 +596,11 @@ def __init__(
self.left = self.orig_left = _left
self.right = self.orig_right = _right
self.how = how
self.axis = axis

# bm_axis -> the axis on the BlockManager
self.bm_axis = axis
# axis --> the axis on the Series/DataFrame
self.axis = 1 - axis if self.left.ndim == 2 else 0

self.on = com.maybe_make_list(on)
self.left_on = com.maybe_make_list(left_on)
Expand Down Expand Up @@ -664,18 +668,17 @@ def get_result(self):

join_index, left_indexer, right_indexer = self._get_join_info()

ldata, rdata = self.left._data, self.right._data
lsuf, rsuf = self.suffixes

llabels, rlabels = _items_overlap_with_suffix(
ldata.items, lsuf, rdata.items, rsuf
self.left._info_axis, lsuf, self.right._info_axis, rsuf
)

lindexers = {1: left_indexer} if left_indexer is not None else {}
rindexers = {1: right_indexer} if right_indexer is not None else {}

result_data = concatenate_block_managers(
[(ldata, lindexers), (rdata, rindexers)],
[(self.left._data, lindexers), (self.right._data, rindexers)],
axes=[llabels.append(rlabels), join_index],
concat_axis=0,
copy=self.copy,
Expand Down Expand Up @@ -864,8 +867,8 @@ def _get_join_indexers(self):
)

def _get_join_info(self):
left_ax = self.left._data.axes[self.axis]
right_ax = self.right._data.axes[self.axis]
left_ax = self.left.axes[self.axis]
right_ax = self.right.axes[self.axis]

if self.left_index and self.right_index and self.how != "asof":
join_index, left_indexer, right_indexer = left_ax.join(
Expand Down Expand Up @@ -1478,12 +1481,10 @@ def __init__(
def get_result(self):
join_index, left_indexer, right_indexer = self._get_join_info()

# this is a bit kludgy
ldata, rdata = self.left._data, self.right._data
lsuf, rsuf = self.suffixes

llabels, rlabels = _items_overlap_with_suffix(
ldata.items, lsuf, rdata.items, rsuf
self.left._info_axis, lsuf, self.right._info_axis, rsuf
)

if self.fill_method == "ffill":
Expand All @@ -1497,7 +1498,7 @@ def get_result(self):
rindexers = {1: right_join_indexer} if right_join_indexer is not None else {}

result_data = concatenate_block_managers(
[(ldata, lindexers), (rdata, rindexers)],
[(self.left._data, lindexers), (self.right._data, rindexers)],
axes=[llabels.append(rlabels), join_index],
concat_axis=0,
copy=self.copy,
Expand Down