pandas-dev · phofl · Mar 2, 2023 · Mar 1, 2023
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
@@ -739,4 +739,22 @@ def time_memory_usage_object_dtype(self):
         self.df2.memory_usage(deep=True)
 
 
+class Round:
+    def setup(self):
+        self.df = DataFrame(np.random.randn(10000, 10))
+        self.df_t = self.df.transpose(copy=True)
+
+    def time_round(self):
+        self.df.round()
+
+    def time_round_transposed(self):
+        self.df_t.round()
+
+    def peakmem_round(self):
+        self.df.round()
+
+    def peakmem_round_transposed(self):
+        self.df_t.round()
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -1169,6 +1169,7 @@ Performance improvements
 - Fixed a reference leak in :func:`read_hdf` (:issue:`37441`)
 - Fixed a memory leak in :meth:`DataFrame.to_json` and :meth:`Series.to_json` when serializing datetimes and timedeltas (:issue:`40443`)
 - Decreased memory usage in many :class:`DataFrameGroupBy` methods (:issue:`51090`)
+- Performance improvement in :meth:`DataFrame.round` for an integer ``decimal`` parameter (:issue:`17254`)
 - Performance improvement in :meth:`DataFrame.replace` and :meth:`Series.replace` when using a large dict for ``to_replace`` (:issue:`6697`)
 - Memory improvement in :class:`StataReader` when reading seekable files (:issue:`48922`)
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9927,12 +9927,14 @@ def _series_round(ser: Series, decimals: int) -> Series:
                 raise TypeError("Values in decimals must be integers")
             new_cols = list(_dict_round(self, decimals))
         elif is_integer(decimals):
-            # Dispatch to Series.round
-            new_cols = [_series_round(v, decimals) for _, v in self.items()]
+            # Dispatch to Block.round
+            return self._constructor(
+                self._mgr.round(decimals=decimals, using_cow=using_copy_on_write()),
+            ).__finalize__(self, method="round")
         else:
             raise TypeError("decimals must be an integer, a dict-like or a Series")
 
-        if len(new_cols) > 0:
+        if new_cols is not None and len(new_cols) > 0:
             return self._constructor(
                 concat(new_cols, axis=1), index=self.index, columns=self.columns
             ).__finalize__(self, method="round")

diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -323,6 +323,9 @@ def where(self: T, other, cond, align: bool) -> T:
             cond=cond,
         )
 
+    def round(self: T, decimals: int, using_cow: bool = False) -> T:
+        return self.apply_with_block("round", decimals=decimals, using_cow=using_cow)
+
     def setitem(self: T, indexer, value) -> T:
         return self.apply_with_block("setitem", indexer=indexer, value=value)
 

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1462,6 +1462,39 @@ def quantile(
         result = ensure_block_shape(result, ndim=2)
         return new_block_2d(result, placement=self._mgr_locs)
 
+    def round(self, decimals: int, using_cow: bool = False) -> Block:
+        """
+        Rounds the values.
+        If the block is not of an integer or float dtype, nothing happens.
+        This is consistent with DataFrame.round behavivor.
+        (Note: Series.round would raise)
+
+        Parameters
+        ----------
+        decimals: int,
+            Number of decimal places to round to.
+            Caller is responsible for validating this
+        using_cow: bool,
+            Whether Copy on Write is enabled right now
+        """
+        if not self.is_numeric or self.is_bool:
+            return self.copy(deep=not using_cow)
+        refs = None
+        # TODO: round only defined on BaseMaskedArray
+        # Series also does this, so would need to fix both places
+        # error: Item "ExtensionArray" of "Union[ndarray[Any, Any], ExtensionArray]"
+        # has no attribute "round"
+        values = self.values.round(decimals)  # type: ignore[union-attr]
+        if values is self.values:
+            refs = self.refs
+            if not using_cow:
+                # Normally would need to do this before, but
+                # numpy only returns same array when round operation
+                # is no-op
+                # https://github.com/numpy/numpy/blob/486878b37fc7439a3b2b87747f50db9b62fea8eb/numpy/core/src/multiarray/calculation.c#L625-L636
+                values = values.copy()
+        return self.make_block_same_class(values, refs=refs)
+
     # ---------------------------------------------------------------------
     # Abstract Methods Overridden By EABackedBlock and NumpyBlock
 

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -367,6 +367,13 @@ def where(self: T, other, cond, align: bool) -> T:
             using_cow=using_copy_on_write(),
         )
 
+    def round(self: T, decimals: int, using_cow: bool = False) -> T:
+        return self.apply(
+            "round",
+            decimals=decimals,
+            using_cow=using_cow,
+        )
+
     def setitem(self: T, indexer, value) -> T:
         """
         Set values with indexer.

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
@@ -986,20 +986,28 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manag
         assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
 
 
-def test_round(using_copy_on_write):
+@pytest.mark.parametrize("decimals", [-1, 0, 1])
+def test_round(using_copy_on_write, decimals):
     df = DataFrame({"a": [1, 2], "b": "c"})
-    df2 = df.round()
     df_orig = df.copy()
+    df2 = df.round(decimals=decimals)
 
     if using_copy_on_write:
         assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
-        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+        # TODO: Make inplace by using out parameter of ndarray.round?
+        if decimals >= 0:
+            # Ensure lazy copy if no-op
+            assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+        else:
+            assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     else:
         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
 
     df2.iloc[0, 1] = "d"
+    df2.iloc[0, 0] = 4
     if using_copy_on_write:
         assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
     tm.assert_frame_equal(df, df_orig)