From e966c261caf42e25852b9210b55b35d287288aa2 Mon Sep 17 00:00:00 2001
From: Brett Rosen <brett@datarobot.com>
Date: Mon, 18 Jul 2016 10:42:06 -0400
Subject: [PATCH 1/3] Test case for patch, plus fix to not swallow exceptions

---
 pandas/io/tests/parser/common.py                 | 15 +++++++++++++++
 pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv | 14 ++++++++++++++
 pandas/io/tests/parser/test_parsers.py           |  1 +
 pandas/parser.pyx                                | 15 ++++++++++++++-
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 670f3df6f3984..421273bea5e8a 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -3,6 +3,7 @@
 import csv
 import os
 import platform
+import codecs
 
 import re
 import sys
@@ -45,6 +46,20 @@ def test_empty_decimal_marker(self):
         with tm.assertRaisesRegexp(ValueError, msg):
             self.read_csv(StringIO(data), decimal='')
 
+    def test_bad_stream_exception(self):
+        handle = open(self.csv_shiftjs, "rb")
+        codec = codecs.lookup("utf-8")
+        utf8 = codecs.lookup('utf-8')
+        # stream must be binary UTF8
+        stream = codecs.StreamRecoder(
+            handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter)
+        if compat.PY3:
+            msg = "'utf-8' codec can't decode byte"
+        else:
+            msg = "'utf8' codec can't decode byte"
+        with tm.assertRaisesRegexp(UnicodeDecodeError, msg):
+            self.read_csv(stream)
+
     def test_read_csv(self):
         if not compat.PY3:
             if compat.is_platform_windows():
diff --git a/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv b/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv
new file mode 100644
index 0000000000000..218ddf333ef52
--- /dev/null
+++ b/pandas/io/tests/parser/data/sauron.SHIFT_JIS.csv
@@ -0,0 +1,14 @@
+num, text
+1,�T�E�����iSauron�A�A�C�k�A�̑n���̎� - ��O�I3019�N3��25���j�́AJ�ER�ER�E�g�[���L���̒����𕑑�Ƃ��������w�z�r�b�g�̖`���x�w�w�֕���x�w�V���}�����̕���x�̓o��l���B
+2,�w�z�r�b�g�̖`���x�Ɍ��y�̂���u���l����Ȃ��t�v�i�f��w�z�r�b�g�V���[�Y�x�̎����ł́u���l�����i�l�N���}���T�[�j�v�j�Ƃ͔ނ̂��Ƃł���B
+3,���̑��҂ł���w�w�֕���x�ɂ����Ắu��̎w�ցithe One Ring�j�v�̍���A�u�����iDark Lord�j�v�A�u���̎ҁithe One�j[1]�v�Ƃ��ēo�ꂷ��B�O�j�ɂ�����w�V���}�����̕���x�ł́A����̖��������S�X�̍ł��͂��鑤�߂ł������B
+4,�T�E�����͌����A�A���_�i�n���j�̑n����S�����V�g�I�푰�A�C�k�A�̈���ł��������A�僁���R�[���̔��t�ɉ��S���đ����A�A���_�ɊQ���Ȃ����݂ƂȂ����B
+5,�u�T�E�����v�Ƃ̓N�E�F�����Łu�g�̖т̂悾���́v�Ƃ����Ӗ��ł���A�V���_�����œ��l�̈Ӗ��ł��閼�O�u�S���T�E�A�v�ƌĂ΂�邱�Ƃ�����B
+6,�����́A�T�E����������A���݌������G���t�ɂ�閼�ł���A�w�w�֕���x�쒆�ɂ����ăA���S�����́u����i�T�E�����j�͎����̖{���̖��͎g��Ȃ����A��������ɏ���������ɏo�����肷�邱�Ƃ������Ȃ��v�Ɣ������Ă���B
+7,���̂ق��A���I�ɃG���t�ɑ΂��Ď��̂����Ƃ���閼�ɁA�u�A���i�^�[���i������N�j�v�A�u�A���^�m�i���M�ȍ׍H�t�j�v�A�u�A�E�����f�B���i�A�E���̉��l�j�v������B
+8,���I�̍��̃T�E�����́A���݂ɕϐg����\�͂������Ă����B
+9,���̔\�͂��g���Ό��ڗ킵�����h�ȊO���𑕂����Ƃ�A�܂�����ȘT��z����������Ƃ����������ɕς��邱�Ƃ��ł��A�G���t���狰���ꂽ�B
+10,���I�Ɉ�̎w�ւ����グ���T�E�����́A���̗͂̎w�ւŐ�����鎖���₻�̏��L�҂��x�z�ł���悤�ɂȂ����B
+11,�܂��A���̂��łтĂ��w�ւ�������艽�x�ł��h�邱�Ƃ��ł����B
+12,�������k�[���m�[���v���̍ۂɔ��������̂�j�󂳂ꂽ��́A��x�Ɣ������ϐg���邱�Ƃ͂ł��Ȃ��Ȃ�A���̈��ӂ̋�̂悤�Ȍ�������낵���p�����Ƃ�Ȃ��Ȃ����Ƃ����B
+13,�܂����΂��΁u�܂Ԃ��̂Ȃ��΂ɉ����ꂽ�ځv�Ƃ������S�ە\���ő�����ꂽ�B
diff --git a/pandas/io/tests/parser/test_parsers.py b/pandas/io/tests/parser/test_parsers.py
index 21f903342a611..6001c85ae76b1 100644
--- a/pandas/io/tests/parser/test_parsers.py
+++ b/pandas/io/tests/parser/test_parsers.py
@@ -44,6 +44,7 @@ def setUp(self):
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
+        self.csv_shiftjs = os.path.join(self.dirpath, 'sauron.SHIFT_JIS.csv')
 
 
 class TestCParserHighMemory(BaseParser, CParserTests, tm.TestCase):
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
index 3928bc8472113..61a1e038b89ce 100644
--- a/pandas/parser.pyx
+++ b/pandas/parser.pyx
@@ -10,7 +10,9 @@ import warnings
 from csv import QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_NONE
 from cpython cimport (PyObject, PyBytes_FromString,
                       PyBytes_AsString, PyBytes_Check,
-                      PyUnicode_Check, PyUnicode_AsUTF8String)
+                      PyUnicode_Check, PyUnicode_AsUTF8String,
+                      PyErr_Occurred, PyErr_Fetch)
+from cpython.ref cimport PyObject, Py_XDECREF
 from io.common import CParserError, DtypeWarning, EmptyDataError
 
 
@@ -1878,6 +1880,17 @@ cdef kh_float64_t* kset_float64_from_list(values) except NULL:
 
 
 cdef raise_parser_error(object base, parser_t *parser):
+    cdef:
+        object old_exc
+        PyObject *type, *value, *traceback
+    if PyErr_Occurred():
+        PyErr_Fetch(&type, &value, &traceback);
+        Py_XDECREF(type)
+        Py_XDECREF(traceback)
+        if value != NULL:
+            old_exc = <object> value
+            Py_XDECREF(value)
+            raise old_exc
     message = '%s. C error: ' % base
     if parser.error_msg != NULL:
         if PY3:

From 6ed3a2e2aa60730de3378902c5c2ad18ae03569b Mon Sep 17 00:00:00 2001
From: Brett Rosen <brett@datarobot.com>
Date: Mon, 18 Jul 2016 10:43:38 -0400
Subject: [PATCH 2/3] Flake

---
 pandas/io/tests/parser/common.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 421273bea5e8a..2080305209ee9 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -52,7 +52,8 @@ def test_bad_stream_exception(self):
         utf8 = codecs.lookup('utf-8')
         # stream must be binary UTF8
         stream = codecs.StreamRecoder(
-            handle, utf8.encode, utf8.decode, codec.streamreader, codec.streamwriter)
+            handle, utf8.encode, utf8.decode, codec.streamreader,
+            codec.streamwriter)
         if compat.PY3:
             msg = "'utf-8' codec can't decode byte"
         else:

From 0efe18b845f3c6551d73fb1f6e1b3766709f000e Mon Sep 17 00:00:00 2001
From: Brett Rosen <brett@datarobot.com>
Date: Tue, 19 Jul 2016 07:08:47 -0400
Subject: [PATCH 3/3] Address review comments

---
 doc/source/whatsnew/v0.19.0.txt  | 2 +-
 pandas/io/tests/parser/common.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index f65f7d57d5d08..068207936818c 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -271,7 +271,7 @@ API changes
 - ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`)
 - Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`)
 - ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`)
-
+- More informative exceptions are passed through the parser. The exception type would now be the original exception type instead of ``CParserError``. (:issue `13652`)
 
 .. _whatsnew_0190.api.tolist:
 
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
index 2080305209ee9..11eed79e03267 100644
--- a/pandas/io/tests/parser/common.py
+++ b/pandas/io/tests/parser/common.py
@@ -47,6 +47,11 @@ def test_empty_decimal_marker(self):
             self.read_csv(StringIO(data), decimal='')
 
     def test_bad_stream_exception(self):
+        # Issue 13652:
+        # This test validates that both python engine
+        # and C engine will raise UnicodeDecodeError instead of
+        # c engine raising CParserError and swallowing exception
+        # that caused read to fail.
         handle = open(self.csv_shiftjs, "rb")
         codec = codecs.lookup("utf-8")
         utf8 = codecs.lookup('utf-8')
@@ -60,6 +65,7 @@ def test_bad_stream_exception(self):
             msg = "'utf8' codec can't decode byte"
         with tm.assertRaisesRegexp(UnicodeDecodeError, msg):
             self.read_csv(stream)
+        stream.close()
 
     def test_read_csv(self):
         if not compat.PY3: