BUG: display.precision options seems off-by-one (GH10451)

rosnfeld · rosnfeld · commit 66cec775f3e2 · 2015-08-02T16:17:17.000+01:00
diff --git a/doc/source/options.rst b/doc/source/options.rst
@@ -227,7 +227,7 @@ can specify the option ``df.info(null_counts=True)`` to override on showing a pa
    df.info()
    pd.reset_option('max_info_rows')
 
-``display.precision`` sets the output display precision. This is only a
+``display.precision`` sets the output display precision in terms of decimal places. This is only a
 suggestion.
 
 .. ipython:: python
@@ -368,9 +368,11 @@ display.notebook_repr_html True         When True, IPython notebook will
                                         pandas objects (if it is available).
 display.pprint_nest_depth  3            Controls the number of nested levels
                                         to process when pretty-printing
-display.precision          7            Floating point output precision
-                                        (number of significant digits). This is
-                                        only a suggestion
+display.precision          6            Floating point output precision in
+                                        terms of number of places after the
+                                        decimal, for regular formatting as well
+                                        as scientific notation. Similar to
+                                        numpy's ``precision`` print option
 display.show_dimensions    truncate     Whether to print out dimensions
                                         at the end of DataFrame repr.
                                         If 'truncate' is specified, only
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -391,6 +391,42 @@ New behavior:
 
 See :ref:`documentation <io.hdf5>` for more details.
 
+Changes to ``display.precision`` option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``display.precision`` option has been clarified to refer to decimal places (:issue:`10451`).
+
+Earlier versions of pandas would format floating point numbers to have one less decimal place than the value in
+``display.precision``.
+
+.. code-block:: python
+
+  In [1]: pd.set_option('display.precision', 2)
+
+  In [2]: pd.DataFrame({'x': [123.456789]})
+  Out[2]:
+         x
+  0  123.5
+
+If interpreting precision as "significant figures" this did work for scientific notation but that same interpretation
+did not work for values with standard formatting. It was also out of step with how numpy handles formatting.
+
+Going forward the value of ``display.precision`` will directly control the number of places after the decimal, for
+regular formatting as well as scientific notation, similar to how numpy's ``precision`` print option works.
+
+.. ipython:: python
+
+  pd.set_option('display.precision', 2)
+  pd.DataFrame({'x': [123.456789]})
+
+To preserve output behavior with prior versions the default value of ``display.precision`` has been reduced to ``6``
+from ``7``.
+
+.. ipython:: python
+  :suppress:
+  pd.set_option('display.precision', 6)
+
+
 .. _whatsnew_0170.api_breaking.other:
 
 Other API Changes
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -236,7 +236,7 @@ def mpl_style_cb(key):
     return val
 
 with cf.config_prefix('display'):
-    cf.register_option('precision', 7, pc_precision_doc, validator=is_int)
+    cf.register_option('precision', 6, pc_precision_doc, validator=is_int)
     cf.register_option('float_format', None, float_format_doc)
     cf.register_option('column_space', 12, validator=is_int)
     cf.register_option('max_info_rows', 1690785, pc_max_info_rows_doc,
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -2014,28 +2014,28 @@ def _format_strings(self):
         if self.formatter is not None:
             fmt_values = [self.formatter(x) for x in self.values]
         else:
-            fmt_str = '%% .%df' % (self.digits - 1)
+            fmt_str = '%% .%df' % self.digits
             fmt_values = self._format_with(fmt_str)
 
             if len(fmt_values) > 0:
                 maxlen = max(len(x) for x in fmt_values)
             else:
                 maxlen = 0
 
-            too_long = maxlen > self.digits + 5
+            too_long = maxlen > self.digits + 6
 
             abs_vals = np.abs(self.values)
 
             # this is pretty arbitrary for now
             has_large_values = (abs_vals > 1e8).any()
-            has_small_values = ((abs_vals < 10 ** (-self.digits+1)) &
+            has_small_values = ((abs_vals < 10 ** (-self.digits)) &
                                 (abs_vals > 0)).any()
 
             if too_long and has_large_values:
-                fmt_str = '%% .%de' % (self.digits - 1)
+                fmt_str = '%% .%de' % self.digits
                 fmt_values = self._format_with(fmt_str)
             elif has_small_values:
-                fmt_str = '%% .%de' % (self.digits - 1)
+                fmt_str = '%% .%de' % self.digits
                 fmt_values = self._format_with(fmt_str)
 
         return fmt_values
diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -1523,7 +1523,7 @@ def test_to_string_no_index(self):
 
     def test_to_string_float_formatting(self):
         self.reset_display_options()
-        fmt.set_option('display.precision', 6, 'display.column_space',
+        fmt.set_option('display.precision', 5, 'display.column_space',
                        12, 'display.notebook_repr_html', False)
 
         df = DataFrame({'x': [0, 0.25, 3456.000, 12e+45, 1.64e+6,
@@ -1554,7 +1554,7 @@ def test_to_string_float_formatting(self):
         self.assertEqual(df_s, expected)
 
         self.reset_display_options()
-        self.assertEqual(get_option("display.precision"), 7)
+        self.assertEqual(get_option("display.precision"), 6)
 
         df = DataFrame({'x': [1e9, 0.2512]})
         df_s = df.to_string()
@@ -3055,7 +3055,7 @@ def test_output_significant_digits(self):
         # Issue #9764
 
         # In case default display precision changes:
-        with pd.option_context('display.precision', 7):
+        with pd.option_context('display.precision', 6):
             # DataFrame example from issue #9764
             d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
 
@@ -3070,6 +3070,17 @@ def test_output_significant_digits(self):
             for (start, stop), v in expected_output.items():
                 self.assertEqual(str(d[start:stop]), v)
 
+    def test_too_long(self):
+        # GH 10451
+        with pd.option_context('display.precision', 4):
+            # need both a number > 1e8 and something that normally formats to having length > display.precision + 6
+            df = pd.DataFrame(dict(x=[12345.6789]))
+            self.assertEqual(str(df), '            x\n0  12345.6789')
+            df = pd.DataFrame(dict(x=[2e8]))
+            self.assertEqual(str(df), '           x\n0  200000000')
+            df = pd.DataFrame(dict(x=[12345.6789, 2e8]))
+            self.assertEqual(str(df), '            x\n0  1.2346e+04\n1  2.0000e+08')
+
 
 class TestRepr_timedelta64(tm.TestCase):