Skip to content

Commit 6440067

Browse files
gfyoungjorisvandenbossche
authored andcommitted
[Backport pandas-dev#14492] BUG: Accept unicode quotechars again in pd.read_csv
Title is self-explanatory. Affects Python 2.x only. Closes pandas-dev#14477. Author: gfyoung <[email protected]> Closes pandas-dev#14492 from gfyoung/quotechar-unicode-2.x and squashes the following commits: ec9f59a [gfyoung] BUG: Accept unicode quotechars again in pd.read_csv (cherry picked from commit 6130e77)
1 parent ebe6319 commit 6440067

File tree

4 files changed

+20
-2
lines changed

4 files changed

+20
-2
lines changed

doc/source/whatsnew/v0.19.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ Bug Fixes
3636
- Compat with Cython 0.25 for building (:issue:`14496`)
3737

3838

39+
- Bug in ``pd.read_csv`` for Python 2.x in which Unicode quote characters were no longer being respected (:issue:`14477`)
3940
- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
4041
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)
4142

pandas/io/parsers.py

+3
Original file line numberDiff line numberDiff line change
@@ -1759,6 +1759,9 @@ def __init__(self, f, **kwds):
17591759
self.delimiter = kwds['delimiter']
17601760

17611761
self.quotechar = kwds['quotechar']
1762+
if isinstance(self.quotechar, compat.text_type):
1763+
self.quotechar = str(self.quotechar)
1764+
17621765
self.escapechar = kwds['escapechar']
17631766
self.doublequote = kwds['doublequote']
17641767
self.skipinitialspace = kwds['skipinitialspace']

pandas/io/tests/parser/quoting.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import pandas.util.testing as tm
1010

1111
from pandas import DataFrame
12-
from pandas.compat import StringIO
12+
from pandas.compat import PY3, StringIO, u
1313

1414

1515
class QuotingTests(object):
@@ -138,3 +138,16 @@ def test_double_quote(self):
138138
result = self.read_csv(StringIO(data), quotechar='"',
139139
doublequote=False)
140140
tm.assert_frame_equal(result, expected)
141+
142+
def test_quotechar_unicode(self):
143+
# See gh-14477
144+
data = 'a\n1'
145+
expected = DataFrame({'a': [1]})
146+
147+
result = self.read_csv(StringIO(data), quotechar=u('"'))
148+
tm.assert_frame_equal(result, expected)
149+
150+
# Compared to Python 3.x, Python 2.x does not handle unicode well.
151+
if PY3:
152+
result = self.read_csv(StringIO(data), quotechar=u('\u0394'))
153+
tm.assert_frame_equal(result, expected)

pandas/parser.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,8 @@ cdef class TextReader:
570570
if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
571571
raise TypeError('bad "quoting" value')
572572

573-
if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
573+
if not isinstance(quote_char, (str, compat.text_type,
574+
bytes)) and quote_char is not None:
574575
dtype = type(quote_char).__name__
575576
raise TypeError('"quotechar" must be string, '
576577
'not {dtype}'.format(dtype=dtype))

0 commit comments

Comments
 (0)