From dc57e2e52e9fbe4535bc44eb3bda62214a116f79 Mon Sep 17 00:00:00 2001 From: Rob Levy Date: Tue, 14 Jul 2015 16:23:33 +0100 Subject: [PATCH] ENH: Added DataFrame.round and associated tests --- doc/source/api.rst | 1 + doc/source/options.rst | 2 + doc/source/whatsnew/v0.17.0.txt | 10 +++ pandas/core/frame.py | 70 ++++++++++++++++++++ pandas/tests/test_format.py | 109 ++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index 2f4fd860f270a..38c2c1091469b 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -853,6 +853,7 @@ Computations / Descriptive Stats DataFrame.prod DataFrame.quantile DataFrame.rank + DataFrame.round DataFrame.sem DataFrame.skew DataFrame.sum diff --git a/doc/source/options.rst b/doc/source/options.rst index 26871a11473de..753c4cc52cab8 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -438,3 +438,5 @@ For instance: :suppress: pd.reset_option('^display\.') + +To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`. diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt index eae33bc80be32..b21fcf0dcb007 100644 --- a/doc/source/whatsnew/v0.17.0.txt +++ b/doc/source/whatsnew/v0.17.0.txt @@ -62,6 +62,16 @@ New features ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13]) ser.interpolate(limit=1, limit_direction='both') +- Round DataFrame to variable number of decimal places (:issue:`10568`). + + .. ipython :: python + + df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'], + index=['first', 'second', 'third']) + df + df.round(2) + df.round({'A': 0, 'C': 2}) + .. _whatsnew_0170.gil: Releasing the GIL diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 997dfeb728ade..64d9d28ddc611 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4253,6 +4253,76 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None, left_index=left_index, right_index=right_index, sort=sort, suffixes=suffixes, copy=copy) + def round(self, decimals=0, out=None): + """ + Round a DataFrame to a variable number of decimal places. + + .. versionadded:: 0.17.0 + + Parameters + ---------- + decimals : int, dict, Series + Number of decimal places to round each column to. If an int is + given, round each column to the same number of places. + Otherwise dict and Series round to variable numbers of places. + Column names should be in the keys if `decimals` is a + dict-like, or in the index if `decimals` is a Series. Any + columns not included in `decimals` will be left as is. Elements + of `decimals` which are not columns of the input will be + ignored. + + Examples + -------- + >>> df = pd.DataFrame(np.random.random([3, 3]), + ... columns=['A', 'B', 'C'], index=['first', 'second', 'third']) + >>> df + A B C + first 0.028208 0.992815 0.173891 + second 0.038683 0.645646 0.577595 + third 0.877076 0.149370 0.491027 + >>> df.round(2) + A B C + first 0.03 0.99 0.17 + second 0.04 0.65 0.58 + third 0.88 0.15 0.49 + >>> df.round({'A': 1, 'C': 2}) + A B C + first 0.0 0.992815 0.17 + second 0.0 0.645646 0.58 + third 0.9 0.149370 0.49 + >>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C']) + >>> df.round(decimals) + A B C + first 0.0 1 0.17 + second 0.0 1 0.58 + third 0.9 0 0.49 + + Returns + ------- + DataFrame object + """ + from pandas.tools.merge import concat + + def _dict_round(df, decimals): + for col in df: + try: + yield np.round(df[col], decimals[col]) + except KeyError: + yield df[col] + + if isinstance(decimals, (dict, Series)): + new_cols = [col for col in _dict_round(self, decimals)] + elif com.is_integer(decimals): + # Dispatch to numpy.round + new_cols = [np.round(self[col], decimals) for col in self] + else: + raise TypeError("decimals must be an integer, a dict-like or a Series") + + if len(new_cols) > 0: + return concat(new_cols, axis=1) + else: + return self + #---------------------------------------------------------------------- # Statistical methods, etc. diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index de6d172408916..7877ee3c5a6cc 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2680,6 +2680,115 @@ def test_to_csv_date_format(self): self.assertEqual(df_day.to_csv(), expected_default_day) self.assertEqual(df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day) + def test_round_dataframe(self): + + # GH 2665 + + # Test that rounding an empty DataFrame does nothing + df = DataFrame() + tm.assert_frame_equal(df, df.round()) + + # Here's the test frame we'll be working with + df = DataFrame( + {'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]}) + + # Default round to integer (i.e. decimals=0) + expected_rounded = DataFrame( + {'col1': [1., 2., 3.], 'col2': [1., 2., 3.]}) + tm.assert_frame_equal(df.round(), expected_rounded) + + # Round with an integer + decimals = 2 + expected_rounded = DataFrame( + {'col1': [1.12, 2.12, 3.12], 'col2': [1.23, 2.23, 3.23]}) + tm.assert_frame_equal(df.round(decimals), expected_rounded) + + # This should also work with np.round (since np.round dispatches to + # df.round) + tm.assert_frame_equal(np.round(df, decimals), expected_rounded) + + # Round with a list + round_list = [1, 2] + with self.assertRaises(TypeError): + df.round(round_list) + + # Round with a dictionary + expected_rounded = DataFrame( + {'col1': [1.1, 2.1, 3.1], 'col2': [1.23, 2.23, 3.23]}) + round_dict = {'col1': 1, 'col2': 2} + tm.assert_frame_equal(df.round(round_dict), expected_rounded) + + # Incomplete dict + expected_partially_rounded = DataFrame( + {'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]}) + partial_round_dict = {'col2': 1} + tm.assert_frame_equal( + df.round(partial_round_dict), expected_partially_rounded) + + # Dict with unknown elements + wrong_round_dict = {'col3': 2, 'col2': 1} + tm.assert_frame_equal( + df.round(wrong_round_dict), expected_partially_rounded) + + # float input to `decimals` + non_int_round_dict = {'col1': 1, 'col2': 0.5} + if sys.version < LooseVersion('2.7'): + # np.round([1.123, 2.123], 0.5) is only a warning in Python 2.6 + with self.assert_produces_warning(DeprecationWarning): + df.round(non_int_round_dict) + else: + with self.assertRaises(TypeError): + df.round(non_int_round_dict) + + # String input + non_int_round_dict = {'col1': 1, 'col2': 'foo'} + with self.assertRaises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with self.assertRaises(TypeError): + df.round(non_int_round_Series) + + # List input + non_int_round_dict = {'col1': 1, 'col2': [1, 2]} + with self.assertRaises(TypeError): + df.round(non_int_round_dict) + + non_int_round_Series = Series(non_int_round_dict) + with self.assertRaises(TypeError): + df.round(non_int_round_Series) + + # Non integer Series inputs + non_int_round_Series = Series(non_int_round_dict) + with self.assertRaises(TypeError): + df.round(non_int_round_Series) + + non_int_round_Series = Series(non_int_round_dict) + with self.assertRaises(TypeError): + df.round(non_int_round_Series) + + # Negative numbers + negative_round_dict = {'col1': -1, 'col2': -2} + big_df = df * 100 + expected_neg_rounded = DataFrame( + {'col1':[110., 210, 310], 'col2':[100., 200, 300]}) + tm.assert_frame_equal( + big_df.round(negative_round_dict), expected_neg_rounded) + + # nan in Series round + nan_round_Series = Series({'col1': nan, 'col2':1}) + expected_nan_round = DataFrame( + {'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]}) + if sys.version < LooseVersion('2.7'): + # Rounding with decimal is a ValueError in Python < 2.7 + with self.assertRaises(ValueError): + df.round(nan_round_Series) + else: + with self.assertRaises(TypeError): + df.round(nan_round_Series) + + # Make sure this doesn't break existing Series.round + tm.assert_series_equal(df['col1'].round(1), expected_rounded['col1']) class TestSeriesFormatting(tm.TestCase): _multiprocess_can_split_ = True