Skip to content

Commit d814f43

Browse files
gfyoungjreback
authored andcommitted
BUG: Fix csv.QUOTE_NONNUMERIC quoting in to_csv
Closes #12922: "bug" traced to #12194 Author: gfyoung <[email protected]> Closes #13418 from gfyoung/to-csv-quote-bugfix and squashes the following commits: 8e53112 [gfyoung] BUG: Fix quoting behaviour in to_csv for csv.QUOTE_NONNUMERIC
1 parent a965d85 commit d814f43

File tree

4 files changed

+101
-29
lines changed

4 files changed

+101
-29
lines changed

doc/source/whatsnew/v0.18.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ Bug Fixes
388388

389389
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
390390
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
391+
392+
- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
391393
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
392394

393395

pandas/core/internals.py

+14
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,20 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
15291529
if slicer is not None:
15301530
values = values[:, slicer]
15311531

1532+
# see gh-13418: no special formatting is desired at the
1533+
# output (important for appropriate 'quoting' behaviour),
1534+
# so do not pass it through the FloatArrayFormatter
1535+
if float_format is None and decimal == '.':
1536+
mask = isnull(values)
1537+
1538+
if not quoting:
1539+
values = values.astype(str)
1540+
else:
1541+
values = np.array(values, dtype='object')
1542+
1543+
values[mask] = na_rep
1544+
return values
1545+
15321546
from pandas.formats.format import FloatArrayFormatter
15331547
formatter = FloatArrayFormatter(values, na_rep=na_rep,
15341548
float_format=float_format,

pandas/formats/format.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# -*- coding: utf-8 -*-
2+
"""
3+
Internal module for formatting output data in csv, html,
4+
and latex files. This module also applies to display formatting.
5+
"""
6+
27
from __future__ import print_function
38
from distutils.version import LooseVersion
49
# pylint: disable=W0141

pandas/tests/frame/test_to_csv.py

+80-29
Original file line numberDiff line numberDiff line change
@@ -824,35 +824,6 @@ def test_to_csv_float_format(self):
824824
index=['A', 'B'], columns=['X', 'Y', 'Z'])
825825
assert_frame_equal(rs, xp)
826826

827-
def test_to_csv_quoting(self):
828-
df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
829-
830-
buf = StringIO()
831-
df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC)
832-
833-
result = buf.getvalue()
834-
expected = ('"A","B"\n'
835-
'1,"foo"\n'
836-
'2,"bar"\n'
837-
'3,"baz"\n')
838-
839-
self.assertEqual(result, expected)
840-
841-
# quoting windows line terminators, presents with encoding?
842-
# #3503
843-
text = 'a,b,c\n1,"test \r\n",3\n'
844-
df = pd.read_csv(StringIO(text))
845-
buf = StringIO()
846-
df.to_csv(buf, encoding='utf-8', index=False)
847-
self.assertEqual(buf.getvalue(), text)
848-
849-
# testing if quoting parameter is passed through with multi-indexes
850-
# related to issue #7791
851-
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
852-
df = df.set_index(['a', 'b'])
853-
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
854-
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
855-
856827
def test_to_csv_unicodewriter_quoting(self):
857828
df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
858829

@@ -1131,3 +1102,83 @@ def test_to_csv_with_dst_transitions(self):
11311102
df.to_pickle(path)
11321103
result = pd.read_pickle(path)
11331104
assert_frame_equal(result, df)
1105+
1106+
def test_to_csv_quoting(self):
1107+
df = DataFrame({
1108+
'c_string': ['a', 'b,c'],
1109+
'c_int': [42, np.nan],
1110+
'c_float': [1.0, 3.2],
1111+
'c_bool': [True, False],
1112+
})
1113+
1114+
expected = """\
1115+
,c_bool,c_float,c_int,c_string
1116+
0,True,1.0,42.0,a
1117+
1,False,3.2,,"b,c"
1118+
"""
1119+
result = df.to_csv()
1120+
self.assertEqual(result, expected)
1121+
1122+
result = df.to_csv(quoting=None)
1123+
self.assertEqual(result, expected)
1124+
1125+
result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
1126+
self.assertEqual(result, expected)
1127+
1128+
expected = """\
1129+
"","c_bool","c_float","c_int","c_string"
1130+
"0","True","1.0","42.0","a"
1131+
"1","False","3.2","","b,c"
1132+
"""
1133+
result = df.to_csv(quoting=csv.QUOTE_ALL)
1134+
self.assertEqual(result, expected)
1135+
1136+
# see gh-12922, gh-13259: make sure changes to
1137+
# the formatters do not break this behaviour
1138+
expected = """\
1139+
"","c_bool","c_float","c_int","c_string"
1140+
0,True,1.0,42.0,"a"
1141+
1,False,3.2,"","b,c"
1142+
"""
1143+
result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
1144+
self.assertEqual(result, expected)
1145+
1146+
msg = "need to escape, but no escapechar set"
1147+
tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
1148+
quoting=csv.QUOTE_NONE)
1149+
tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
1150+
quoting=csv.QUOTE_NONE,
1151+
escapechar=None)
1152+
1153+
expected = """\
1154+
,c_bool,c_float,c_int,c_string
1155+
0,True,1.0,42.0,a
1156+
1,False,3.2,,b!,c
1157+
"""
1158+
result = df.to_csv(quoting=csv.QUOTE_NONE,
1159+
escapechar='!')
1160+
self.assertEqual(result, expected)
1161+
1162+
expected = """\
1163+
,c_bool,c_ffloat,c_int,c_string
1164+
0,True,1.0,42.0,a
1165+
1,False,3.2,,bf,c
1166+
"""
1167+
result = df.to_csv(quoting=csv.QUOTE_NONE,
1168+
escapechar='f')
1169+
self.assertEqual(result, expected)
1170+
1171+
# see gh-3503: quoting Windows line terminators
1172+
# presents with encoding?
1173+
text = 'a,b,c\n1,"test \r\n",3\n'
1174+
df = pd.read_csv(StringIO(text))
1175+
buf = StringIO()
1176+
df.to_csv(buf, encoding='utf-8', index=False)
1177+
self.assertEqual(buf.getvalue(), text)
1178+
1179+
# xref gh-7791: make sure the quoting parameter is passed through
1180+
# with multi-indexes
1181+
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
1182+
df = df.set_index(['a', 'b'])
1183+
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
1184+
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)

0 commit comments

Comments
 (0)