Skip to content

ENH: Added DataFrame.round and associated tests #10568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 3, 2015
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -853,6 +853,7 @@ Computations / Descriptive Stats
DataFrame.prod
DataFrame.quantile
DataFrame.rank
DataFrame.round
DataFrame.sem
DataFrame.skew
DataFrame.sum
Expand Down
2 changes: 2 additions & 0 deletions doc/source/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,5 @@ For instance:
:suppress:

pd.reset_option('^display\.')

To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`.
10 changes: 10 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ New features
ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13])
ser.interpolate(limit=1, limit_direction='both')

- Round DataFrame to variable number of decimal places (:issue:`10568`).

.. ipython :: python

df = pd.DataFrame(np.random.random([3, 3]), columns=['A', 'B', 'C'],
index=['first', 'second', 'third'])
df
df.round(2)
df.round({'A': 0, 'C': 2})

.. _whatsnew_0170.gil:

Releasing the GIL
Expand Down
70 changes: 70 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4253,6 +4253,76 @@ def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
left_index=left_index, right_index=right_index, sort=sort,
suffixes=suffixes, copy=copy)

def round(self, decimals=0, out=None):
"""
Round a DataFrame to a variable number of decimal places.

.. versionadded:: 0.17.0

Parameters
----------
decimals : int, dict, Series
Number of decimal places to round each column to. If an int is
given, round each column to the same number of places.
Otherwise dict and Series round to variable numbers of places.
Column names should be in the keys if `decimals` is a
dict-like, or in the index if `decimals` is a Series. Any
columns not included in `decimals` will be left as is. Elements
of `decimals` which are not columns of the input will be
ignored.

Examples
--------
>>> df = pd.DataFrame(np.random.random([3, 3]),
... columns=['A', 'B', 'C'], index=['first', 'second', 'third'])
>>> df
A B C
first 0.028208 0.992815 0.173891
second 0.038683 0.645646 0.577595
third 0.877076 0.149370 0.491027
>>> df.round(2)
A B C
first 0.03 0.99 0.17
second 0.04 0.65 0.58
third 0.88 0.15 0.49
>>> df.round({'A': 1, 'C': 2})
A B C
first 0.0 0.992815 0.17
second 0.0 0.645646 0.58
third 0.9 0.149370 0.49
>>> decimals = pd.Series([1, 0, 2], index=['A', 'B', 'C'])
>>> df.round(decimals)
A B C
first 0.0 1 0.17
second 0.0 1 0.58
third 0.9 0 0.49

Returns
-------
DataFrame object
"""
from pandas.tools.merge import concat

def _dict_round(df, decimals):
for col in df:
try:
yield np.round(df[col], decimals[col])
except KeyError:
yield df[col]

if isinstance(decimals, (dict, Series)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be an int dtype series. I think you have to require >= 0. I suppose you could ignore nans as well. I am not sure what np.round would do with these cases, so pls add some tests for validation. If the errors are obtuse, then may need to catch and report a better message.

new_cols = [col for col in _dict_round(self, decimals)]
elif com.is_integer(decimals):
# Dispatch to numpy.round
new_cols = [np.round(self[col], decimals) for col in self]
else:
raise TypeError("decimals must be an integer, a dict-like or a Series")

if len(new_cols) > 0:
return concat(new_cols, axis=1)
else:
return self

#----------------------------------------------------------------------
# Statistical methods, etc.

Expand Down
109 changes: 109 additions & 0 deletions pandas/tests/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2680,6 +2680,115 @@ def test_to_csv_date_format(self):
self.assertEqual(df_day.to_csv(), expected_default_day)
self.assertEqual(df_day.to_csv(date_format='%Y-%m-%d'), expected_default_day)

def test_round_dataframe(self):

# GH 2665

# Test that rounding an empty DataFrame does nothing
df = DataFrame()
tm.assert_frame_equal(df, df.round())

# Here's the test frame we'll be working with
df = DataFrame(
{'col1': [1.123, 2.123, 3.123], 'col2': [1.234, 2.234, 3.234]})

# Default round to integer (i.e. decimals=0)
expected_rounded = DataFrame(
{'col1': [1., 2., 3.], 'col2': [1., 2., 3.]})
tm.assert_frame_equal(df.round(), expected_rounded)

# Round with an integer
decimals = 2
expected_rounded = DataFrame(
{'col1': [1.12, 2.12, 3.12], 'col2': [1.23, 2.23, 3.23]})
tm.assert_frame_equal(df.round(decimals), expected_rounded)

# This should also work with np.round (since np.round dispatches to
# df.round)
tm.assert_frame_equal(np.round(df, decimals), expected_rounded)

# Round with a list
round_list = [1, 2]
with self.assertRaises(TypeError):
df.round(round_list)

# Round with a dictionary
expected_rounded = DataFrame(
{'col1': [1.1, 2.1, 3.1], 'col2': [1.23, 2.23, 3.23]})
round_dict = {'col1': 1, 'col2': 2}
tm.assert_frame_equal(df.round(round_dict), expected_rounded)

# Incomplete dict
expected_partially_rounded = DataFrame(
{'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]})
partial_round_dict = {'col2': 1}
tm.assert_frame_equal(
df.round(partial_round_dict), expected_partially_rounded)

# Dict with unknown elements
wrong_round_dict = {'col3': 2, 'col2': 1}
tm.assert_frame_equal(
df.round(wrong_round_dict), expected_partially_rounded)

# float input to `decimals`
non_int_round_dict = {'col1': 1, 'col2': 0.5}
if sys.version < LooseVersion('2.7'):
# np.round([1.123, 2.123], 0.5) is only a warning in Python 2.6
with self.assert_produces_warning(DeprecationWarning):
df.round(non_int_round_dict)
else:
with self.assertRaises(TypeError):
df.round(non_int_round_dict)

# String input
non_int_round_dict = {'col1': 1, 'col2': 'foo'}
with self.assertRaises(TypeError):
df.round(non_int_round_dict)

non_int_round_Series = Series(non_int_round_dict)
with self.assertRaises(TypeError):
df.round(non_int_round_Series)

# List input
non_int_round_dict = {'col1': 1, 'col2': [1, 2]}
with self.assertRaises(TypeError):
df.round(non_int_round_dict)

non_int_round_Series = Series(non_int_round_dict)
with self.assertRaises(TypeError):
df.round(non_int_round_Series)

# Non integer Series inputs
non_int_round_Series = Series(non_int_round_dict)
with self.assertRaises(TypeError):
df.round(non_int_round_Series)

non_int_round_Series = Series(non_int_round_dict)
with self.assertRaises(TypeError):
df.round(non_int_round_Series)

# Negative numbers
negative_round_dict = {'col1': -1, 'col2': -2}
big_df = df * 100
expected_neg_rounded = DataFrame(
{'col1':[110., 210, 310], 'col2':[100., 200, 300]})
tm.assert_frame_equal(
big_df.round(negative_round_dict), expected_neg_rounded)

# nan in Series round
nan_round_Series = Series({'col1': nan, 'col2':1})
expected_nan_round = DataFrame(
{'col1': [1.123, 2.123, 3.123], 'col2': [1.2, 2.2, 3.2]})
if sys.version < LooseVersion('2.7'):
# Rounding with decimal is a ValueError in Python < 2.7
with self.assertRaises(ValueError):
df.round(nan_round_Series)
else:
with self.assertRaises(TypeError):
df.round(nan_round_Series)

# Make sure this doesn't break existing Series.round
tm.assert_series_equal(df['col1'].round(1), expected_rounded['col1'])

class TestSeriesFormatting(tm.TestCase):
_multiprocess_can_split_ = True
Expand Down