Skip to content

Commit 8e53112

Browse files
committed
BUG: Fix quoting behaviour in to_csv for csv.QUOTE_NONNUMERIC
Float values were being quoted despite the quoting spec. Bug traced to the float formatting that was unconditionally casting all floats to string. Unconditional casting traced back to commit 2d51b33 (pandas-devgh-12194) via bisection. This commit undoes some of those changes to rectify the behaviour. Closes pandas-devgh-12922. [ci skip]
1 parent f752886 commit 8e53112

File tree

4 files changed

+101
-29
lines changed

4 files changed

+101
-29
lines changed

doc/source/whatsnew/v0.18.2.txt

+2
Original file line numberDiff line numberDiff line change
@@ -388,6 +388,8 @@ Bug Fixes
388388

389389
- Bug in various index types, which did not propagate the name of passed index (:issue:`12309`)
390390
- Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`)
391+
392+
- Bug in ``DataFrame.to_csv()`` in which float values were being quoted even though quotations were specified for non-numeric values only (:issue:`12922`, :issue:`13259`)
391393
- Bug in ``MultiIndex`` slicing where extra elements were returned when level is non-unique (:issue:`12896`)
392394

393395

pandas/core/internals.py

+14
Original file line numberDiff line numberDiff line change
@@ -1529,6 +1529,20 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None,
15291529
if slicer is not None:
15301530
values = values[:, slicer]
15311531

1532+
# see gh-13418: no special formatting is desired at the
1533+
# output (important for appropriate 'quoting' behaviour),
1534+
# so do not pass it through the FloatArrayFormatter
1535+
if float_format is None and decimal == '.':
1536+
mask = isnull(values)
1537+
1538+
if not quoting:
1539+
values = values.astype(str)
1540+
else:
1541+
values = np.array(values, dtype='object')
1542+
1543+
values[mask] = na_rep
1544+
return values
1545+
15321546
from pandas.formats.format import FloatArrayFormatter
15331547
formatter = FloatArrayFormatter(values, na_rep=na_rep,
15341548
float_format=float_format,

pandas/formats/format.py

+5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
# -*- coding: utf-8 -*-
2+
"""
3+
Internal module for formatting output data in csv, html,
4+
and latex files. This module also applies to display formatting.
5+
"""
6+
27
from __future__ import print_function
38
from distutils.version import LooseVersion
49
# pylint: disable=W0141

pandas/tests/frame/test_to_csv.py

+80-29
Original file line numberDiff line numberDiff line change
@@ -824,35 +824,6 @@ def test_to_csv_float_format(self):
824824
index=['A', 'B'], columns=['X', 'Y', 'Z'])
825825
assert_frame_equal(rs, xp)
826826

827-
def test_to_csv_quoting(self):
828-
df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
829-
830-
buf = StringIO()
831-
df.to_csv(buf, index=False, quoting=csv.QUOTE_NONNUMERIC)
832-
833-
result = buf.getvalue()
834-
expected = ('"A","B"\n'
835-
'1,"foo"\n'
836-
'2,"bar"\n'
837-
'3,"baz"\n')
838-
839-
self.assertEqual(result, expected)
840-
841-
# quoting windows line terminators, presents with encoding?
842-
# #3503
843-
text = 'a,b,c\n1,"test \r\n",3\n'
844-
df = pd.read_csv(StringIO(text))
845-
buf = StringIO()
846-
df.to_csv(buf, encoding='utf-8', index=False)
847-
self.assertEqual(buf.getvalue(), text)
848-
849-
# testing if quoting parameter is passed through with multi-indexes
850-
# related to issue #7791
851-
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
852-
df = df.set_index(['a', 'b'])
853-
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
854-
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)
855-
856827
def test_to_csv_unicodewriter_quoting(self):
857828
df = DataFrame({'A': [1, 2, 3], 'B': ['foo', 'bar', 'baz']})
858829

@@ -1131,3 +1102,83 @@ def test_to_csv_with_dst_transitions(self):
11311102
df.to_pickle(path)
11321103
result = pd.read_pickle(path)
11331104
assert_frame_equal(result, df)
1105+
1106+
def test_to_csv_quoting(self):
1107+
df = DataFrame({
1108+
'c_string': ['a', 'b,c'],
1109+
'c_int': [42, np.nan],
1110+
'c_float': [1.0, 3.2],
1111+
'c_bool': [True, False],
1112+
})
1113+
1114+
expected = """\
1115+
,c_bool,c_float,c_int,c_string
1116+
0,True,1.0,42.0,a
1117+
1,False,3.2,,"b,c"
1118+
"""
1119+
result = df.to_csv()
1120+
self.assertEqual(result, expected)
1121+
1122+
result = df.to_csv(quoting=None)
1123+
self.assertEqual(result, expected)
1124+
1125+
result = df.to_csv(quoting=csv.QUOTE_MINIMAL)
1126+
self.assertEqual(result, expected)
1127+
1128+
expected = """\
1129+
"","c_bool","c_float","c_int","c_string"
1130+
"0","True","1.0","42.0","a"
1131+
"1","False","3.2","","b,c"
1132+
"""
1133+
result = df.to_csv(quoting=csv.QUOTE_ALL)
1134+
self.assertEqual(result, expected)
1135+
1136+
# see gh-12922, gh-13259: make sure changes to
1137+
# the formatters do not break this behaviour
1138+
expected = """\
1139+
"","c_bool","c_float","c_int","c_string"
1140+
0,True,1.0,42.0,"a"
1141+
1,False,3.2,"","b,c"
1142+
"""
1143+
result = df.to_csv(quoting=csv.QUOTE_NONNUMERIC)
1144+
self.assertEqual(result, expected)
1145+
1146+
msg = "need to escape, but no escapechar set"
1147+
tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
1148+
quoting=csv.QUOTE_NONE)
1149+
tm.assertRaisesRegexp(csv.Error, msg, df.to_csv,
1150+
quoting=csv.QUOTE_NONE,
1151+
escapechar=None)
1152+
1153+
expected = """\
1154+
,c_bool,c_float,c_int,c_string
1155+
0,True,1.0,42.0,a
1156+
1,False,3.2,,b!,c
1157+
"""
1158+
result = df.to_csv(quoting=csv.QUOTE_NONE,
1159+
escapechar='!')
1160+
self.assertEqual(result, expected)
1161+
1162+
expected = """\
1163+
,c_bool,c_ffloat,c_int,c_string
1164+
0,True,1.0,42.0,a
1165+
1,False,3.2,,bf,c
1166+
"""
1167+
result = df.to_csv(quoting=csv.QUOTE_NONE,
1168+
escapechar='f')
1169+
self.assertEqual(result, expected)
1170+
1171+
# see gh-3503: quoting Windows line terminators
1172+
# presents with encoding?
1173+
text = 'a,b,c\n1,"test \r\n",3\n'
1174+
df = pd.read_csv(StringIO(text))
1175+
buf = StringIO()
1176+
df.to_csv(buf, encoding='utf-8', index=False)
1177+
self.assertEqual(buf.getvalue(), text)
1178+
1179+
# xref gh-7791: make sure the quoting parameter is passed through
1180+
# with multi-indexes
1181+
df = pd.DataFrame({'a': [1, 2], 'b': [3, 4], 'c': [5, 6]})
1182+
df = df.set_index(['a', 'b'])
1183+
expected = '"a","b","c"\n"1","3","5"\n"2","4","6"\n'
1184+
self.assertEqual(df.to_csv(quoting=csv.QUOTE_ALL), expected)

0 commit comments

Comments
 (0)