Skip to content

Commit 7e61e3a

Browse files
authored
Backport PR #51498: PERF: Implement round on the block level (#51729)
1 parent e9d2ae2 commit 7e61e3a

File tree

7 files changed

+78
-6
lines changed

7 files changed

+78
-6
lines changed

asv_bench/benchmarks/frame_methods.py

+18
Original file line numberDiff line numberDiff line change
@@ -739,4 +739,22 @@ def time_memory_usage_object_dtype(self):
739739
self.df2.memory_usage(deep=True)
740740

741741

742+
class Round:
743+
def setup(self):
744+
self.df = DataFrame(np.random.randn(10000, 10))
745+
self.df_t = self.df.transpose(copy=True)
746+
747+
def time_round(self):
748+
self.df.round()
749+
750+
def time_round_transposed(self):
751+
self.df_t.round()
752+
753+
def peakmem_round(self):
754+
self.df.round()
755+
756+
def peakmem_round_transposed(self):
757+
self.df_t.round()
758+
759+
742760
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1169,6 +1169,7 @@ Performance improvements
11691169
- Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
11701170
- Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`)
11711171
- Decreased memory usage in many :class:`DataFrameGroupBy` methods (:issue:`51090`)
1172+
- Performance improvement in :meth:`DataFrame.round` for an integer ``decimal`` parameter (:issue:`17254`)
11721173
- Performance improvement in :meth:`DataFrame.replace` and :meth:`Series.replace` when using a large dict for ``to_replace`` (:issue:`6697`)
11731174
- Memory improvement in :class:`StataReader` when reading seekable files (:issue:`48922`)
11741175

pandas/core/frame.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -9927,12 +9927,14 @@ def _series_round(ser: Series, decimals: int) -> Series:
99279927
raise TypeError("Values in decimals must be integers")
99289928
new_cols = list(_dict_round(self, decimals))
99299929
elif is_integer(decimals):
9930-
# Dispatch to Series.round
9931-
new_cols = [_series_round(v, decimals) for _, v in self.items()]
9930+
# Dispatch to Block.round
9931+
return self._constructor(
9932+
self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()),
9933+
).__finalize__(self, method="round")
99329934
else:
99339935
raise TypeError("decimals must be an integer, a dict-like or a Series")
99349936

9935-
if len(new_cols) > 0:
9937+
if new_cols is not None and len(new_cols) > 0:
99369938
return self._constructor(
99379939
concat(new_cols, axis=1), index=self.index, columns=self.columns
99389940
).__finalize__(self, method="round")

pandas/core/internals/array_manager.py

+3
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,9 @@ def where(self: T, other, cond, align: bool) -> T:
323323
cond=cond,
324324
)
325325

326+
def round(self: T, decimals: int, using_cow: bool = False) -> T:
327+
return self.apply_with_block("round", decimals=decimals, using_cow=using_cow)
328+
326329
def setitem(self: T, indexer, value) -> T:
327330
return self.apply_with_block("setitem", indexer=indexer, value=value)
328331

pandas/core/internals/blocks.py

+33
Original file line numberDiff line numberDiff line change
@@ -1462,6 +1462,39 @@ def quantile(
14621462
result = ensure_block_shape(result, ndim=2)
14631463
return new_block_2d(result, placement=self._mgr_locs)
14641464

1465+
def round(self, decimals: int, using_cow: bool = False) -> Block:
1466+
"""
1467+
Rounds the values.
1468+
If the block is not of an integer or float dtype, nothing happens.
1469+
This is consistent with DataFrame.round behavivor.
1470+
(Note: Series.round would raise)
1471+
1472+
Parameters
1473+
----------
1474+
decimals: int,
1475+
Number of decimal places to round to.
1476+
Caller is responsible for validating this
1477+
using_cow: bool,
1478+
Whether Copy on Write is enabled right now
1479+
"""
1480+
if not self.is_numeric or self.is_bool:
1481+
return self.copy(deep=not using_cow)
1482+
refs = None
1483+
# TODO: round only defined on BaseMaskedArray
1484+
# Series also does this, so would need to fix both places
1485+
# error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"
1486+
# has no attribute "round"
1487+
values = self.values.round(decimals) # type: ignore[union-attr]
1488+
if values is self.values:
1489+
refs = self.refs
1490+
if not using_cow:
1491+
# Normally would need to do this before, but
1492+
# numpy only returns same array when round operation
1493+
# is no-op
1494+
# https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636
1495+
values = values.copy()
1496+
return self.make_block_same_class(values, refs=refs)
1497+
14651498
# ---------------------------------------------------------------------
14661499
# Abstract Methods Overridden By EABackedBlock and NumpyBlock
14671500

pandas/core/internals/managers.py

+7
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,13 @@ def where(self: T, other, cond, align: bool) -> T:
367367
using_cow=using_copy_on_write(),
368368
)
369369

370+
def round(self: T, decimals: int, using_cow: bool = False) -> T:
371+
return self.apply(
372+
"round",
373+
decimals=decimals,
374+
using_cow=using_cow,
375+
)
376+
370377
def setitem(self: T, indexer, value) -> T:
371378
"""
372379
Set values with indexer.

pandas/tests/copy_view/test_methods.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -986,20 +986,28 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manag
986986
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
987987

988988

989-
def test_round(using_copy_on_write):
989+
@pytest.mark.parametrize("decimals", [-1, 0, 1])
990+
def test_round(using_copy_on_write, decimals):
990991
df = DataFrame({"a": [1, 2], "b": "c"})
991-
df2 = df.round()
992992
df_orig = df.copy()
993+
df2 = df.round(decimals=decimals)
993994

994995
if using_copy_on_write:
995996
assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
996-
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
997+
# TODO: Make inplace by using out parameter of ndarray.round?
998+
if decimals >= 0:
999+
# Ensure lazy copy if no-op
1000+
assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
1001+
else:
1002+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
9971003
else:
9981004
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
9991005

10001006
df2.iloc[0, 1] = "d"
1007+
df2.iloc[0, 0] = 4
10011008
if using_copy_on_write:
10021009
assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
1010+
assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
10031011
tm.assert_frame_equal(df, df_orig)
10041012

10051013

0 commit comments

Comments
 (0)