Merge pull request #10568 from roblevy/variable-round

jreback · jreback · commit 9aafd6d363bb · 2015-09-03T09:41:10.000-04:00
ENH: Added DataFrame.round and associated tests
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -853,6 +853,7 @@ Computations / Descriptive Stats
    DataFrame.prod
    DataFrame.quantile
    DataFrame.rank
+   DataFrame.round
    DataFrame.sem
    DataFrame.skew
    DataFrame.sum
diff --git a/doc/source/options.rst b/doc/source/options.rst
@@ -438,3 +438,5 @@ For instance:
    :suppress:
 
    pd.reset_option('^display\.')
+
+To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`.
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -62,6 +62,16 @@ New features
      ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13])
      ser.interpolate(limit=1, limit_direction='both')
 
+- Round DataFrame to variable number of decimal places (:issue:`10568`).
+
+   .. ipython :: python
+
+    df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'],
+    index=['first', 'second', 'third'])
+    df
+    df.round(2)
+    df.round({'A': 0, 'C': 2})
+    
 .. _whatsnew_0170.gil:
 
 Releasing the GIL
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4259,6 +4259,76 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
                      left_index=left_index, right_index=right_index, sort=sort,
                      suffixes=suffixes, copy=copy)
 
+    def round(self, decimals=0, out=None):
+        """
+        Round a DataFrame to a variable number of decimal places.
+
+        .. versionadded:: 0.17.0
+
+        Parameters
+        ----------
+        decimals : int, dict, Series
+            Number of decimal places to round each column to. If an int is
+            given, round each column to the same number of places.
+            Otherwise dict and Series round to variable numbers of places.
+            Column names should be in the keys if `decimals` is a
+            dict-like, or in the index if `decimals` is a Series. Any
+            columns not included in `decimals` will be left as is. Elements
+            of `decimals` which are not columns of the input will be
+            ignored.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame(np.random.random([3, 3]),
+        ...     columns=['A', 'B', 'C'], index=['first', 'second', 'third'])
+        >>> df
+                       A         B         C
+        first   0.028208  0.992815  0.173891
+        second  0.038683  0.645646  0.577595
+        third   0.877076  0.149370  0.491027
+        >>> df.round(2)
+                   A     B     C
+        first   0.03  0.99  0.17
+        second  0.04  0.65  0.58
+        third   0.88  0.15  0.49
+        >>> df.round({'A': 1, 'C': 2})
+                  A         B     C
+        first   0.0  0.992815  0.17
+        second  0.0  0.645646  0.58
+        third   0.9  0.149370  0.49
+        >>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C'])
+        >>> df.round(decimals)
+                  A  B     C
+        first   0.0  1  0.17
+        second  0.0  1  0.58
+        third   0.9  0  0.49
+
+        Returns
+        -------
+        DataFrame object
+        """
+        from pandas.tools.merge import concat
+
+        def _dict_round(df, decimals):
+            for col in df:
+                try:
+                    yield np.round(df[col], decimals[col])
+                except KeyError:
+                    yield df[col]
+
+        if isinstance(decimals, (dict, Series)):
+            new_cols = [col for col in _dict_round(self, decimals)]
+        elif com.is_integer(decimals):
+            # Dispatch to numpy.round
+            new_cols = [np.round(self[col], decimals) for col in self]
+        else:
+            raise TypeError("decimals must be an integer, a dict-like or a Series")
+
+        if len(new_cols) > 0:
+            return concat(new_cols, axis=1)
+        else:
+            return self
+
     #----------------------------------------------------------------------
     # Statistical methods, etc.
 
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -2680,6 +2680,115 @@ def test_to_csv_date_format(self):
         self.assertEqual(df_day.to_csv(), expected_default_day)
         self.assertEqual(df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day)
 
+    def test_round_dataframe(self):
+
+        # GH 2665
+
+        # Test that rounding an empty DataFrame does nothing
+        df = DataFrame()
+        tm.assert_frame_equal(df, df.round())
+
+        # Here's the test frame we'll be working with
+        df = DataFrame(
+            {'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]})
+
+        # Default round to integer (i.e. decimals=0)
+        expected_rounded = DataFrame(
+            {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
+        tm.assert_frame_equal(df.round(), expected_rounded)
+
+        # Round with an integer
+        decimals = 2
+        expected_rounded = DataFrame(
+            {'col1': [1.12, 2.12, 3.12], 'col2': [1.23, 2.23, 3.23]})
+        tm.assert_frame_equal(df.round(decimals), expected_rounded)
+
+        # This should also work with np.round (since np.round dispatches to
+        # df.round)
+        tm.assert_frame_equal(np.round(df, decimals), expected_rounded)
+
+        # Round with a list
+        round_list = [1, 2]
+        with self.assertRaises(TypeError):
+            df.round(round_list)
+
+        # Round with a dictionary
+        expected_rounded = DataFrame(
+            {'col1': [1.1, 2.1, 3.1], 'col2': [1.23, 2.23, 3.23]})
+        round_dict = {'col1': 1, 'col2': 2}
+        tm.assert_frame_equal(df.round(round_dict), expected_rounded)
+
+        # Incomplete dict
+        expected_partially_rounded = DataFrame(
+            {'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]})
+        partial_round_dict = {'col2': 1}
+        tm.assert_frame_equal(
+            df.round(partial_round_dict), expected_partially_rounded)
+
+        # Dict with unknown elements
+        wrong_round_dict = {'col3': 2, 'col2': 1}
+        tm.assert_frame_equal(
+            df.round(wrong_round_dict), expected_partially_rounded)
+
+        # float input to `decimals`
+        non_int_round_dict = {'col1': 1, 'col2': 0.5}
+        if sys.version < LooseVersion('2.7'):
+            # np.round([1.123, 2.123], 0.5) is only a warning in Python 2.6
+            with self.assert_produces_warning(DeprecationWarning):
+                df.round(non_int_round_dict)
+        else:
+            with self.assertRaises(TypeError):
+                df.round(non_int_round_dict)
+
+        # String input
+        non_int_round_dict = {'col1': 1, 'col2': 'foo'}
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_dict)
+
+        non_int_round_Series = Series(non_int_round_dict)
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_Series)
+
+        # List input
+        non_int_round_dict = {'col1': 1, 'col2': [1, 2]}
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_dict)
+
+        non_int_round_Series = Series(non_int_round_dict)
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_Series)
+
+        # Non integer Series inputs
+        non_int_round_Series = Series(non_int_round_dict)
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_Series)
+
+        non_int_round_Series = Series(non_int_round_dict)
+        with self.assertRaises(TypeError):
+            df.round(non_int_round_Series)
+
+        # Negative numbers
+        negative_round_dict = {'col1': -1, 'col2': -2}
+        big_df = df * 100
+        expected_neg_rounded = DataFrame(
+                {'col1':[110., 210, 310], 'col2':[100., 200, 300]})
+        tm.assert_frame_equal(
+            big_df.round(negative_round_dict), expected_neg_rounded)
+
+        # nan in Series round
+        nan_round_Series = Series({'col1': nan, 'col2':1})
+        expected_nan_round = DataFrame(
+                {'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]})
+        if sys.version < LooseVersion('2.7'):
+            # Rounding with decimal is a ValueError in Python < 2.7
+            with self.assertRaises(ValueError):
+                df.round(nan_round_Series)
+        else:
+            with self.assertRaises(TypeError):
+                df.round(nan_round_Series)
+
+        # Make sure this doesn't break existing Series.round
+        tm.assert_series_equal(df['col1'].round(1), expected_rounded['col1'])
 
 class TestSeriesFormatting(tm.TestCase):
     _multiprocess_can_split_ = True