DEPR: Deprecate CParserError in favor of ParserError.

gfyoung · gfyoung · commit fb24c0fa2417 · 2016-10-27T12:00:52.000-04:00
Partially resolves pandas-devgh-12665.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -1165,8 +1165,8 @@ too many will cause an error by default:
 
     In [28]: pd.read_csv(StringIO(data))
     ---------------------------------------------------------------------------
-    CParserError                              Traceback (most recent call last)
-    CParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
+    ParserError                              Traceback (most recent call last)
+    ParserError: Error tokenizing data. C error: Expected 3 fields in line 3, saw 4
 
 You can elect to skip bad lines:
 
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -41,6 +41,7 @@ Backwards incompatible API changes
 .. _whatsnew_0200.api:
 
 
+- ``CParserError`` has been deprecated in favor of ``ParserError`` in ``pd.read_csv`` (:issue:`12665`)
 
 
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
@@ -13,6 +13,7 @@
 from pandas.formats.printing import pprint_thing
 from pandas.core.common import AbstractMethodError
 from pandas.types.common import is_number
+from pandas.util.decorators import deprecate
 
 # common NA values
 # no longer excluding inf representations
@@ -65,13 +66,14 @@ def urlopen(*args, **kwargs):
 _VALID_URLS.discard('')
 
 
-class CParserError(ValueError):
+class ParserError(ValueError):
     """
-    Exception that is thrown by the C engine when it encounters
-    a parsing error in `pd.read_csv`
+    Exception that is thrown by an error is encountered in `pd.read_csv`
     """
     pass
 
+CParserError = deprecate('CParserError', ParserError, internal=True)
+
 
 class DtypeWarning(Warning):
     """
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -26,7 +26,7 @@
 from pandas.io.date_converters import generic_parser
 from pandas.io.common import (get_filepath_or_buffer, _validate_header_arg,
                               _get_handle, UnicodeReader, UTF8Recoder,
-                              BaseIterator, CParserError, EmptyDataError,
+                              BaseIterator, ParserError, EmptyDataError,
                               ParserWarning, _NA_VALUES)
 from pandas.tseries import tools
 
@@ -1141,7 +1141,7 @@ def tostr(x):
         # long
         for n in range(len(columns[0])):
             if all(['Unnamed' in tostr(c[n]) for c in columns]):
-                raise CParserError(
+                raise ParserError(
                     "Passed header=[%s] are too many rows for this "
                     "multi_index of columns"
                     % ','.join([str(x) for x in self.header])
diff --git a/pandas/io/tests/parser/common.py b/pandas/io/tests/parser/common.py
@@ -50,7 +50,7 @@ def test_bad_stream_exception(self):
         # Issue 13652:
         # This test validates that both python engine
         # and C engine will raise UnicodeDecodeError instead of
-        # c engine raising CParserError and swallowing exception
+        # c engine raising ParserError and swallowing exception
         # that caused read to fail.
         handle = open(self.csv_shiftjs, "rb")
         codec = codecs.lookup("utf-8")
diff --git a/pandas/io/tests/parser/test_textreader.py b/pandas/io/tests/parser/test_textreader.py
@@ -154,7 +154,7 @@ def test_skip_bad_lines(self):
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None)
-        self.assertRaises(parser.CParserError, reader.read)
+        self.assertRaises(parser.ParserError, reader.read)
 
         reader = TextReader(StringIO(data), delimiter=':',
                             header=None,
@@ -197,7 +197,7 @@ def test_header_not_enough_lines(self):
         assert_array_dicts_equal(expected, recs)
 
         # not enough rows
-        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
+        self.assertRaises(parser.ParserError, TextReader, StringIO(data),
                           delimiter=',', header=5, as_recarray=True)
 
     def test_header_not_enough_lines_as_recarray(self):
@@ -218,7 +218,7 @@ def test_header_not_enough_lines_as_recarray(self):
         assert_array_dicts_equal(expected, recs)
 
         # not enough rows
-        self.assertRaises(parser.CParserError, TextReader, StringIO(data),
+        self.assertRaises(parser.ParserError, TextReader, StringIO(data),
                           delimiter=',', header=5, as_recarray=True)
 
     def test_escapechar(self):
diff --git a/pandas/io/tests/parser/test_unsupported.py b/pandas/io/tests/parser/test_unsupported.py
@@ -15,7 +15,7 @@
 import pandas.util.testing as tm
 
 from pandas.compat import StringIO
-from pandas.io.common import CParserError
+from pandas.io.common import ParserError
 from pandas.io.parsers import read_csv, read_table
 
 
@@ -78,9 +78,9 @@ def test_c_engine(self):
 x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""
         msg = 'Error tokenizing data'
 
-        with tm.assertRaisesRegexp(CParserError, msg):
+        with tm.assertRaisesRegexp(ParserError, msg):
             read_table(StringIO(text), sep='\s+')
-        with tm.assertRaisesRegexp(CParserError, msg):
+        with tm.assertRaisesRegexp(ParserError, msg):
             read_table(StringIO(text), engine='c', sep='\s+')
 
         msg = "Only length-1 thousands markers supported"
diff --git a/pandas/io/tests/test_common.py b/pandas/io/tests/test_common.py
@@ -139,3 +139,10 @@ def test_next(self):
             self.assertEqual(next_line.strip(), line.strip())
 
         self.assertRaises(StopIteration, next, wrapper)
+
+    def test_cparser_error_depr(self):
+        # see gh-12665
+        with tm.assert_produces_warning(DeprecationWarning,
+                                        check_stacklevel=False):
+            with tm.assertRaises(common.ParserError):
+                raise common.CParserError()
diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py
@@ -23,7 +23,7 @@
                            is_platform_windows)
 from pandas.io.common import URLError, urlopen, file_path_to_url
 from pandas.io.html import read_html
-from pandas.parser import CParserError
+from pandas.parser import ParserError
 
 import pandas.util.testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf, network
@@ -652,7 +652,7 @@ def test_parse_dates_combine(self):
 
     def test_computer_sales_page(self):
         data = os.path.join(DATA_PATH, 'computer_sales_page.html')
-        with tm.assertRaisesRegexp(CParserError, r"Passed header=\[0,1\] are "
+        with tm.assertRaisesRegexp(ParserError, r"Passed header=\[0,1\] are "
                                    "too many rows for this multi_index "
                                    "of columns"):
             self.read_html(data, header=[0, 1])
diff --git a/pandas/parser.pyx b/pandas/parser.pyx
@@ -13,7 +13,10 @@ from cpython cimport (PyObject, PyBytes_FromString,
                       PyUnicode_Check, PyUnicode_AsUTF8String,
                       PyErr_Occurred, PyErr_Fetch)
 from cpython.ref cimport PyObject, Py_XDECREF
-from io.common import CParserError, DtypeWarning, EmptyDataError
+# XXX: It's annoying that we have to import both CParserError
+# and ParserError. Unfortunately, backwards compatibility is
+# the higher calling right now.
+from io.common import CParserError, ParserError, DtypeWarning, EmptyDataError
 
 
 cdef extern from "Python.h":
@@ -720,7 +723,7 @@ cdef class TextReader:
                     if isinstance(msg, list):
                         msg = "[%s], len of %d," % (
                             ','.join([ str(m) for m in msg ]), len(msg))
-                    raise CParserError(
+                    raise ParserError(
                         'Passed header=%s but only %d lines in file'
                         % (msg, self.parser.lines))
 
@@ -813,7 +816,7 @@ cdef class TextReader:
             passed_count = len(header[0])
 
             # if passed_count > field_count:
-            #     raise CParserError('Column names have %d fields, '
+            #     raise ParserError('Column names have %d fields, '
             #                        'data has %d fields'
             #                        % (passed_count, field_count))
 
@@ -1005,7 +1008,7 @@ cdef class TextReader:
                 (num_cols >= self.parser.line_fields[i]) * num_cols
 
         if self.table_width - self.leading_cols > num_cols:
-            raise CParserError(
+            raise ParserError(
                 "Too many columns specified: expected %s and found %s" %
                 (self.table_width - self.leading_cols, num_cols))
 
@@ -1060,7 +1063,7 @@ cdef class TextReader:
                                              self.use_unsigned)
 
             if col_res is None:
-                raise CParserError('Unable to parse column %d' % i)
+                raise ParserError('Unable to parse column %d' % i)
 
             results[i] = col_res
 
@@ -1311,7 +1314,7 @@ def _is_file_like(obj):
     if PY3:
         import io
         if isinstance(obj, io.TextIOWrapper):
-            raise CParserError('Cannot handle open unicode files (yet)')
+            raise ParserError('Cannot handle open unicode files (yet)')
 
         # BufferedReader is a byte reader for Python 3
         file = io.BufferedReader
@@ -2016,7 +2019,7 @@ cdef raise_parser_error(object base, parser_t *parser):
     else:
         message += 'no error message set'
 
-    raise CParserError(message)
+    raise ParserError(message)
 
 
 def _concatenate_chunks(list chunks):
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 from pandas.compat import (lmap, range, lrange, StringIO, u)
-from pandas.parser import CParserError
+from pandas.parser import ParserError
 from pandas import (DataFrame, Index, Series, MultiIndex, Timestamp,
                     date_range, read_csv, compat, to_datetime)
 import pandas as pd
@@ -589,7 +589,7 @@ def _make_frame(names=None):
 
             for i in [5, 6, 7]:
                 msg = 'len of {i}, but only 5 lines in file'.format(i=i)
-                with assertRaisesRegexp(CParserError, msg):
+                with assertRaisesRegexp(ParserError, msg):
                     read_csv(path, tupleize_cols=False,
                              header=lrange(i), index_col=0)
 
diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py
@@ -6,12 +6,17 @@
 from functools import wraps
 
 
-def deprecate(name, alternative, alt_name=None):
+def deprecate(name, alternative, alt_name=None, internal=False):
     alt_name = alt_name or alternative.__name__
 
+    # If we're deprecating something internally, use DeprecationWarning
+    # so that we don't bother users with warnings out of their control.
+    # For external things, use FutureWarning instead.
+    _warning = DeprecationWarning if internal else FutureWarning
+
     def wrapper(*args, **kwargs):
         warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name),
-                      FutureWarning, stacklevel=2)
+                      _warning, stacklevel=2)
         return alternative(*args, **kwargs)
     return wrapper
 

Original file line number	Diff line number	Diff line change
`@@ -41,6 +41,7 @@ Backwards incompatible API changes`
`41`	`41`	`.. _whatsnew_0200.api:`
`42`	`42`
`43`	`43`
	`44`	+- ``CParserError`` has been deprecated in favor of ``ParserError`` in ``pd.read_csv`` (:issue:`12665`)
`44`	`45`
`45`	`46`
`46`	`47`