BUG: Fix quoting behaviour in to_csv for csv.QUOTE_NONNUMERIC

gfyoung · gfyoung · commit 8e5311243bd6 · 2016-06-15T16:50:51.000+01:00
Float values were being quoted despite the quoting spec. Bug traced to the float formatting that was unconditionally casting all floats to string. Unconditional casting traced back to commit 2d51b33 (pandas-devgh-12194) via bisection. This commit undoes some of those changes to rectify the behaviour. Closes pandas-devgh-12922. [ci skip]
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -388,6 +388,8 @@ Bug Fixes
 
 - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
 - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
+
+- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
 - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
 
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1529,6 +1529,20 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
         if slicer is not None:
             values = values[:, slicer]
 
+        # see gh-13418: no special formatting is desired at the
+        # output (important for appropriate 'quoting' behaviour),
+        # so do not pass it through the FloatArrayFormatter
+        if float_format is None and decimal == '.':
+            mask = isnull(values)
+
+            if not quoting:
+                values = values.astype(str)
+            else:
+                values = np.array(values, dtype='object')
+
+            values[mask] = na_rep
+            return values
+
         from pandas.formats.format import FloatArrayFormatter
         formatter = FloatArrayFormatter(values, na_rep=na_rep,
                                         float_format=float_format,
diff --git a/pandas/formats/format.py b/pandas/formats/format.py
@@ -1,4 +1,9 @@
 # -*- coding: utf-8 -*-
+"""
+Internal module for formatting output data in csv, html,
+and latex files. This module also applies to display formatting.
+"""
+
 from __future__ import print_function
 from distutils.version import LooseVersion
 # pylint: disable=W0141
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -824,35 +824,6 @@ def test_to_csv_float_format(self):
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
             assert_frame_equal(rs, xp)
 
-    def test_to_csv_quoting(self):
-        df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
-
-        buf = StringIO()
-        df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC)
-
-        result = buf.getvalue()
-        expected = ('"A","B"\n'
-                    '1,"foo"\n'
-                    '2,"bar"\n'
-                    '3,"baz"\n')
-
-        self.assertEqual(result, expected)
-
-        # quoting windows line terminators, presents with encoding?
-        # #3503
-        text = 'a,b,c\n1,"test \r\n",3\n'
-        df = pd.read_csv(StringIO(text))
-        buf = StringIO()
-        df.to_csv(buf, encoding='utf-8', index=False)
-        self.assertEqual(buf.getvalue(), text)
-
-        # testing if quoting parameter is passed through with multi-indexes
-        # related to issue #7791
-        df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
-        df = df.set_index(['a', 'b'])
-        expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
-        self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
-
     def test_to_csv_unicodewriter_quoting(self):
         df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
 
@@ -1131,3 +1102,83 @@ def test_to_csv_with_dst_transitions(self):
             df.to_pickle(path)
             result = pd.read_pickle(path)
             assert_frame_equal(result, df)
+
+    def test_to_csv_quoting(self):
+        df = DataFrame({
+            'c_string': ['a', 'b,c'],
+            'c_int': [42, np.nan],
+            'c_float': [1.0, 3.2],
+            'c_bool': [True, False],
+        })
+
+        expected = """\
+,c_bool,c_float,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,"b,c"
+"""
+        result = df.to_csv()
+        self.assertEqual(result, expected)
+
+        result = df.to_csv(quoting=None)
+        self.assertEqual(result, expected)
+
+        result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
+        self.assertEqual(result, expected)
+
+        expected = """\
+"","c_bool","c_float","c_int","c_string"
+"0","True","1.0","42.0","a"
+"1","False","3.2","","b,c"
+"""
+        result = df.to_csv(quoting=csv.QUOTE_ALL)
+        self.assertEqual(result, expected)
+
+        # see gh-12922, gh-13259: make sure changes to
+        # the formatters do not break this behaviour
+        expected = """\
+"","c_bool","c_float","c_int","c_string"
+0,True,1.0,42.0,"a"
+1,False,3.2,"","b,c"
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
+        self.assertEqual(result, expected)
+
+        msg = "need to escape, but no escapechar set"
+        tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
+                              quoting=csv.QUOTE_NONE)
+        tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
+                              quoting=csv.QUOTE_NONE,
+                              escapechar=None)
+
+        expected = """\
+,c_bool,c_float,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,b!,c
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONE,
+                           escapechar='!')
+        self.assertEqual(result, expected)
+
+        expected = """\
+,c_bool,c_ffloat,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,bf,c
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONE,
+                           escapechar='f')
+        self.assertEqual(result, expected)
+
+        # see gh-3503: quoting Windows line terminators
+        # presents with encoding?
+        text = 'a,b,c\n1,"test \r\n",3\n'
+        df = pd.read_csv(StringIO(text))
+        buf = StringIO()
+        df.to_csv(buf, encoding='utf-8', index=False)
+        self.assertEqual(buf.getvalue(), text)
+
+        # xref gh-7791: make sure the quoting parameter is passed through
+        # with multi-indexes
+        df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
+        df = df.set_index(['a', 'b'])
+        expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
+        self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)