Skip to content

PERF: fastpath DataFrame constructor from BlockManager #33357

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Apr 9, 2020
5 changes: 2 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,9 +166,8 @@ def get_result(self):
elif isinstance(self.f, np.ufunc):
with np.errstate(all="ignore"):
results = self.obj._mgr.apply("apply", func=self.f)
return self.obj._constructor(
data=results, index=self.index, columns=self.columns, copy=False
)
# _constructor will retain self.index and self.columns
return self.obj._constructor(data=results)

# broadcasting
if self.result_type == "broadcast":
Expand Down
5 changes: 5 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,11 @@ def __init__(
data = data._mgr

if isinstance(data, BlockManager):
if index is None and columns is None and dtype is None and copy is False:
# GH#33357 fastpath
NDFrame.__init__(self, data)
return

mgr = self._init_mgr(
data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy
)
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,13 +213,13 @@ def __init__(
object.__setattr__(self, "_attrs", attrs)

@classmethod
def _init_mgr(cls, mgr, axes=None, dtype=None, copy=False):
def _init_mgr(cls, mgr, axes, dtype=None, copy: bool = False) -> BlockManager:
""" passed a manager and a axes dict """
for a, axe in axes.items():
if axe is not None:
mgr = mgr.reindex_axis(
axe, axis=cls._get_block_manager_axis(a), copy=False
)
axe = ensure_index(axe)
bm_axis = cls._get_block_manager_axis(a)
mgr = mgr.reindex_axis(axe, axis=bm_axis, copy=False)

# make a copy if explicitly requested
if copy:
Expand Down
16 changes: 13 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,17 @@ def __init__(
self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
):

if (
isinstance(data, SingleBlockManager)
and index is None
and dtype is None
and copy is False
):
# GH#33357 called with just the SingleBlockManager
NDFrame.__init__(self, data)
self.name = name
return

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are there any places where we are actually setting fastpath=True now ? we should deprecate this

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Series.take, DataFrame._box_col_values. I generally agree it would be nice to be rid of

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And several more places as well.
But that's an independent discussion I would say

# we are called internally, so short-circuit
if fastpath:

Expand Down Expand Up @@ -827,9 +838,8 @@ def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
new_index = self.index.take(indices)
new_values = self._values.take(indices)

return self._constructor(
new_values, index=new_index, fastpath=True
).__finalize__(self, method="take")
result = self._constructor(new_values, index=new_index, fastpath=True)
return result.__finalize__(self, method="take")

def _take_with_is_copy(self, indices, axis=0):
"""
Expand Down