Skip to content

Commit 9a31321

Browse files
committed
BUG: Accept unicode quotechars again in pd.read_csv
Closes pandas-devgh-14477.
1 parent f99f050 commit 9a31321

File tree

4 files changed

+14
-2
lines changed

4 files changed

+14
-2
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Bug Fixes
3535

3636

3737

38+
- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
3839
- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
3940
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)
4041

pandas/compat/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def signature(f):
106106
reduce = functools.reduce
107107
long = int
108108
unichr = chr
109+
unicode = str
109110

110111
# This was introduced in Python 3.3, but we don't support
111112
# Python 3.x < 3.4, so checking PY3 is safe.
@@ -151,6 +152,7 @@ def signature(f):
151152
reduce = reduce
152153
long = long
153154
unichr = unichr
155+
unicode = unicode
154156

155157
# Python 2-builtin ranges produce lists
156158
lrange = builtins.range

pandas/io/tests/parser/quoting.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pandas.util.testing as tm
1010

1111
from pandas import DataFrame
12-
from pandas.compat import StringIO
12+
from pandas.compat import StringIO, u
1313

1414

1515
class QuotingTests(object):
@@ -138,3 +138,11 @@ def test_double_quote(self):
138138
result = self.read_csv(StringIO(data), quotechar='"',
139139
doublequote=False)
140140
tm.assert_frame_equal(result, expected)
141+
142+
def test_quotechar_unicode(self):
143+
# See gh-14477
144+
data = 'a\n"1"'
145+
expected = DataFrame({'a': [1]})
146+
147+
result = self.read_csv(StringIO(data), quotechar=u('"'))
148+
tm.assert_frame_equal(result, expected)

pandas/parser.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,8 @@ cdef class TextReader:
570570
if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
571571
raise TypeError('bad "quoting" value')
572572

573-
if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
573+
if not isinstance(quote_char, (str, compat.unicode,
574+
bytes)) and quote_char is not None:
574575
dtype = type(quote_char).__name__
575576
raise TypeError('"quotechar" must be string, '
576577
'not {dtype}'.format(dtype=dtype))

0 commit comments

Comments
 (0)