Skip to content

PERF: Implement round on the block level #51498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 1, 2023
18 changes: 18 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,4 +739,22 @@ def time_memory_usage_object_dtype(self):
self.df2.memory_usage(deep=True)


class Round:
def setup(self):
self.df = DataFrame(np.random.randn(10000, 10))
self.df_t = self.df.transpose(copy=True)

def time_round(self):
self.df.round()

def time_round_transposed(self):
self.df_t.round()

def peakmem_round(self):
self.df.round()

def peakmem_round_transposed(self):
self.df_t.round()


from .pandas_vb_common import setup # noqa: F401 isort:skip
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ Performance improvements
- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
- Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`)
- Decreased memory usage in many :class:`DataFrameGroupBy` methods (:issue:`51090`)
-
- Performance improvement in :meth:`DataFrame.round` for an integer ``decimal`` parameter (:issue:`17254`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.bug_fixes:
Expand Down
10 changes: 7 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9932,12 +9932,16 @@ def _series_round(ser: Series, decimals: int) -> Series:
raise TypeError("Values in decimals must be integers")
new_cols = list(_dict_round(self, decimals))
elif is_integer(decimals):
# Dispatch to Series.round
new_cols = [_series_round(v, decimals) for _, v in self.items()]
# Dispatch to Block.round
return self._constructor(
self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()),
index=self.index,
columns=self.columns,
).__finalize__(self, method="round")
else:
raise TypeError("decimals must be an integer, a dict-like or a Series")

if len(new_cols) > 0:
if new_cols is not None and len(new_cols) > 0:
return self._constructor(
concat(new_cols, axis=1), index=self.index, columns=self.columns
).__finalize__(self, method="round")
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/internals/array_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,9 @@ def where(self: T, other, cond, align: bool) -> T:
cond=cond,
)

def round(self: T, decimals: int, using_cow: bool = False) -> T:
return self.apply_with_block("round", decimals=decimals, using_cow=using_cow)

def setitem(self: T, indexer, value) -> T:
return self.apply_with_block("setitem", indexer=indexer, value=value)

Expand Down
17 changes: 17 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,23 @@ def quantile(
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)

def round(self, decimals, using_cow=False) -> Block:
"""
Rounds the values.
If the block is not of an integer or float dtype, nothing happens

Parameters
----------
decimals: int,
Number of decimal places to round to.
Caller is responsible for validating this
using_cow: bool,
Whether Copy on Write is enabled right now
"""
if not self.is_numeric or self.is_bool:
return self.copy(deep=None if using_cow else True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

possibly raise?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe that DataFrame.round() never raises even when there are no numeric columns, but Series.round() does.

Do you want me to add a comment that this doesn't raise?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if this is just a perf PR than this should maintain behavior, but might merit an issue or TODO comment. looks like some of our EAs have round methods that are not documented as part of the interface.

return new_block_2d(self.values.round(decimals), placement=self._mgr_locs)

# ---------------------------------------------------------------------
# Abstract Methods Overridden By EABackedBlock and NumpyBlock

Expand Down
7 changes: 7 additions & 0 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,13 @@ def where(self: T, other, cond, align: bool) -> T:
using_cow=using_copy_on_write(),
)

def round(self: T, decimals: int, using_cow: bool = False) -> T:
return self.apply(
"round",
decimals=decimals,
using_cow=using_cow,
)

def setitem(self: T, indexer, value) -> T:
"""
Set values with indexer.
Expand Down