BUG: Fix csv.QUOTE_NONNUMERIC quoting in to_csv

gfyoung · jreback · commit d814f4339400 · 2016-06-16T08:31:32.000-04:00
Closes #12922: "bug" traced to #12194 Author: gfyoung <gfyoung17@gmail.com> Closes #13418 from gfyoung/to-csv-quote-bugfix and squashes the following commits: 8e53112 [gfyoung] BUG: Fix quoting behaviour in to_csv for csv.QUOTE_NONNUMERIC
diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
@@ -388,6 +388,8 @@ Bug Fixes
 
 - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
 - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
+
+- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
 - Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
 
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1529,6 +1529,20 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
         if slicer is not None:
             values = values[:, slicer]
 
+        # see gh-13418: no special formatting is desired at the
+        # output (important for appropriate 'quoting' behaviour),
+        # so do not pass it through the FloatArrayFormatter
+        if float_format is None and decimal == '.':
+            mask = isnull(values)
+
+            if not quoting:
+                values = values.astype(str)
+            else:
+                values = np.array(values, dtype='object')
+
+            values[mask] = na_rep
+            return values
+
         from pandas.formats.format import FloatArrayFormatter
         formatter = FloatArrayFormatter(values, na_rep=na_rep,
                                         float_format=float_format,
diff --git a/pandas/formats/format.py b/pandas/formats/format.py
@@ -1,4 +1,9 @@
 # -*- coding: utf-8 -*-
+"""
+Internal module for formatting output data in csv, html,
+and latex files. This module also applies to display formatting.
+"""
+
 from __future__ import print_function
 from distutils.version import LooseVersion
 # pylint: disable=W0141
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -824,35 +824,6 @@ def test_to_csv_float_format(self):
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
             assert_frame_equal(rs, xp)
 
-    def test_to_csv_quoting(self):
-        df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
-
-        buf = StringIO()
-        df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC)
-
-        result = buf.getvalue()
-        expected = ('"A","B"\n'
-                    '1,"foo"\n'
-                    '2,"bar"\n'
-                    '3,"baz"\n')
-
-        self.assertEqual(result, expected)
-
-        # quoting windows line terminators, presents with encoding?
-        # #3503
-        text = 'a,b,c\n1,"test \r\n",3\n'
-        df = pd.read_csv(StringIO(text))
-        buf = StringIO()
-        df.to_csv(buf, encoding='utf-8', index=False)
-        self.assertEqual(buf.getvalue(), text)
-
-        # testing if quoting parameter is passed through with multi-indexes
-        # related to issue #7791
-        df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
-        df = df.set_index(['a', 'b'])
-        expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
-        self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
-
     def test_to_csv_unicodewriter_quoting(self):
         df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
 
@@ -1131,3 +1102,83 @@ def test_to_csv_with_dst_transitions(self):
             df.to_pickle(path)
             result = pd.read_pickle(path)
             assert_frame_equal(result, df)
+
+    def test_to_csv_quoting(self):
+        df = DataFrame({
+            'c_string': ['a', 'b,c'],
+            'c_int': [42, np.nan],
+            'c_float': [1.0, 3.2],
+            'c_bool': [True, False],
+        })
+
+        expected = """\
+,c_bool,c_float,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,"b,c"
+"""
+        result = df.to_csv()
+        self.assertEqual(result, expected)
+
+        result = df.to_csv(quoting=None)
+        self.assertEqual(result, expected)
+
+        result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
+        self.assertEqual(result, expected)
+
+        expected = """\
+"","c_bool","c_float","c_int","c_string"
+"0","True","1.0","42.0","a"
+"1","False","3.2","","b,c"
+"""
+        result = df.to_csv(quoting=csv.QUOTE_ALL)
+        self.assertEqual(result, expected)
+
+        # see gh-12922, gh-13259: make sure changes to
+        # the formatters do not break this behaviour
+        expected = """\
+"","c_bool","c_float","c_int","c_string"
+0,True,1.0,42.0,"a"
+1,False,3.2,"","b,c"
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
+        self.assertEqual(result, expected)
+
+        msg = "need to escape, but no escapechar set"
+        tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
+                              quoting=csv.QUOTE_NONE)
+        tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
+                              quoting=csv.QUOTE_NONE,
+                              escapechar=None)
+
+        expected = """\
+,c_bool,c_float,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,b!,c
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONE,
+                           escapechar='!')
+        self.assertEqual(result, expected)
+
+        expected = """\
+,c_bool,c_ffloat,c_int,c_string
+0,True,1.0,42.0,a
+1,False,3.2,,bf,c
+"""
+        result = df.to_csv(quoting=csv.QUOTE_NONE,
+                           escapechar='f')
+        self.assertEqual(result, expected)
+
+        # see gh-3503: quoting Windows line terminators
+        # presents with encoding?
+        text = 'a,b,c\n1,"test \r\n",3\n'
+        df = pd.read_csv(StringIO(text))
+        buf = StringIO()
+        df.to_csv(buf, encoding='utf-8', index=False)
+        self.assertEqual(buf.getvalue(), text)
+
+        # xref gh-7791: make sure the quoting parameter is passed through
+        # with multi-indexes
+        df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
+        df = df.set_index(['a', 'b'])
+        expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
+        self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)