Skip to content

PERF: Implement round on the block level #51498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Mar 1, 2023
18 changes: 18 additions & 0 deletions asv_bench/benchmarks/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,4 +739,22 @@ def time_memory_usage_object_dtype(self):
self.df2.memory_usage(deep=True)


class Round:
def setup(self):
self.df = DataFrame(np.random.randn(1000, 100))
self.df_t = self.df.transpose(copy=True)

def time_round(self):
self.df.round()

def time_round_transposed(self):
self.df_t.round()

def peakmem_round(self):
self.df.round()

def peakmem_round_transposed(self):
self.df_t.round()


from .pandas_vb_common import setup # noqa: F401 isort:skip
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ Performance improvements
- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
- Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`)
- Decreased memory usage in many :class:`DataFrameGroupBy` methods (:issue:`51090`)
-
- Performance improvement in :meth:`DataFrame.round` for an integer ``decimal`` parameter (:issue:`17254`)

.. ---------------------------------------------------------------------------
.. _whatsnew_200.bug_fixes:
Expand Down
14 changes: 11 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9915,6 +9915,7 @@ def _dict_round(df: DataFrame, decimals):
yield _series_round(vals, decimals[col])
except KeyError:
yield vals
df._clear_item_cache()

def _series_round(ser: Series, decimals: int) -> Series:
if is_integer_dtype(ser.dtype) or is_float_dtype(ser.dtype):
Expand All @@ -9923,6 +9924,9 @@ def _series_round(ser: Series, decimals: int) -> Series:

nv.validate_round(args, kwargs)

new_cols = None
new_mgr = None

if isinstance(decimals, (dict, Series)):
if isinstance(decimals, Series) and not decimals.index.is_unique:
raise ValueError("Index of decimals must be unique")
Expand All @@ -9932,15 +9936,19 @@ def _series_round(ser: Series, decimals: int) -> Series:
raise TypeError("Values in decimals must be integers")
new_cols = list(_dict_round(self, decimals))
elif is_integer(decimals):
# Dispatch to Series.round
new_cols = [_series_round(v, decimals) for _, v in self.items()]
# Dispatch to Block.round
new_mgr = self._mgr.apply("round", decimals=decimals)
else:
raise TypeError("decimals must be an integer, a dict-like or a Series")

if len(new_cols) > 0:
if new_cols is not None and len(new_cols) > 0:
return self._constructor(
concat(new_cols, axis=1), index=self.index, columns=self.columns
).__finalize__(self, method="round")
elif new_mgr is not None:
return self._constructor(
new_mgr, index=self.index, columns=self.columns
).__finalize__(self, method="round")
else:
return self.copy(deep=False)

Expand Down
16 changes: 16 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1397,6 +1397,22 @@ def quantile(
result = ensure_block_shape(result, ndim=2)
return new_block_2d(result, placement=self._mgr_locs)

def round(self, decimals) -> Block:
"""
Rounds the values.
If the block is not of an integer or float dtype, nothing happens

Parameters
----------
decimals: Number of decimal places to round to.
Caller is responsible for validating this
"""
# TODO: EAs?
# Maybe fallback to numpy
if not self.is_numeric or self.is_bool:
return self.copy(deep=None)
return new_block_2d(self.values.round(decimals), placement=self._mgr_locs)

# ---------------------------------------------------------------------
# Abstract Methods Overridden By EABackedBlock and NumpyBlock

Expand Down