BUG-20591 specify error in test

JustinZhengBC · JustinZhengBC · commit ea1ec6f0ef12 · 2018-11-28T11:33:01.000-08:00
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -330,7 +330,7 @@ Backwards incompatible API changes
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
-- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
+- :meth:`read_csv` with C engine will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
 
 .. _whatsnew_0240.api_breaking.deps:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -501,7 +501,6 @@ def _read(filepath_or_buffer, kwds):
 
 _fwf_defaults = {
     'colspecs': 'infer',
-    'infer_nrows': 100,
     'widths': None,
 }
 
@@ -719,8 +718,8 @@ def parser_f(filepath_or_buffer,
                       )(read_table)
 
 
-def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
-             infer_nrows=100, **kwds):
+def read_fwf(filepath_or_buffer, colspecs='infer',
+             widths=None, **kwds):
 
     r"""
     Read a table of fixed-width formatted lines into DataFrame.
@@ -753,11 +752,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
     widths : list of int, optional
         A list of field widths which can be used instead of 'colspecs' if
         the intervals are contiguous.
-    infer_nrows : int, default 100
-        The number of rows to consider when letting the parser determine the
-        `colspecs`.
-
-        .. versionadded:: 0.24.0
     **kwds : optional
         Optional keyword arguments can be passed to ``TextFileReader``.
 
@@ -792,7 +786,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
             col += w
 
     kwds['colspecs'] = colspecs
-    kwds['infer_nrows'] = infer_nrows
     kwds['engine'] = 'python-fwf'
     return _read(filepath_or_buffer, kwds)
 
@@ -1752,8 +1745,8 @@ def _cast_types(self, values, cast_type, column):
 
             cats = Index(values).unique().dropna()
             values = Categorical._from_inferred_categories(
-                cats, cats.get_indexer(values), cast_type,
-                true_values=self.true_values)
+                cats, cats.get_indexer(values), cast_type
+            )
 
         else:
             try:
@@ -3449,15 +3442,13 @@ class FixedWidthReader(BaseIterator):
     A reader of fixed-width lines.
     """
 
-    def __init__(self, f, colspecs, delimiter, comment, skiprows=None,
-                 infer_nrows=100):
+    def __init__(self, f, colspecs, delimiter, comment, skiprows=None):
         self.f = f
         self.buffer = None
         self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t '
         self.comment = comment
         if colspecs == 'infer':
-            self.colspecs = self.detect_colspecs(infer_nrows=infer_nrows,
-                                                 skiprows=skiprows)
+            self.colspecs = self.detect_colspecs(skiprows=skiprows)
         else:
             self.colspecs = colspecs
 
@@ -3473,20 +3464,19 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None,
                 raise TypeError('Each column specification must be '
                                 '2 element tuple or list of integers')
 
-    def get_rows(self, infer_nrows, skiprows=None):
+    def get_rows(self, n, skiprows=None):
         """
         Read rows from self.f, skipping as specified.
 
-        We distinguish buffer_rows (the first <= infer_nrows
-        lines) from the rows returned to detect_colspecs
-        because it's simpler to leave the other locations
-        with skiprows logic alone than to modify them to
-        deal with the fact we skipped some rows here as
-        well.
+        We distinguish buffer_rows (the first <= n lines)
+        from the rows returned to detect_colspecs because
+        it's simpler to leave the other locations with
+        skiprows logic alone than to modify them to deal
+        with the fact we skipped some rows here as well.
 
         Parameters
         ----------
-        infer_nrows : int
+        n : int
             Number of rows to read from self.f, not counting
             rows that are skipped.
         skiprows: set, optional
@@ -3506,16 +3496,16 @@ def get_rows(self, infer_nrows, skiprows=None):
             if i not in skiprows:
                 detect_rows.append(row)
             buffer_rows.append(row)
-            if len(detect_rows) >= infer_nrows:
+            if len(detect_rows) >= n:
                 break
         self.buffer = iter(buffer_rows)
         return detect_rows
 
-    def detect_colspecs(self, infer_nrows=100, skiprows=None):
+    def detect_colspecs(self, n=100, skiprows=None):
         # Regex escape the delimiters
         delimiters = ''.join(r'\%s' % x for x in self.delimiter)
         pattern = re.compile('([^%s]+)' % delimiters)
-        rows = self.get_rows(infer_nrows, skiprows)
+        rows = self.get_rows(n, skiprows)
         if not rows:
             raise EmptyDataError("No rows from which to infer column width")
         max_len = max(map(len, rows))
@@ -3554,10 +3544,8 @@ class FixedWidthFieldParser(PythonParser):
     def __init__(self, f, **kwds):
         # Support iterators, convert to a list.
         self.colspecs = kwds.pop('colspecs')
-        self.infer_nrows = kwds.pop('infer_nrows')
         PythonParser.__init__(self, f, **kwds)
 
     def _make_reader(self, f):
         self.data = FixedWidthReader(f, self.colspecs, self.delimiter,
-                                     self.comment, self.skiprows,
-                                     self.infer_nrows)
+                                     self.comment, self.skiprows)
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
@@ -423,10 +423,11 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
-@pytest.mark.xfail
-def test_cast_NA_to_bool_raises_error(all_parsers):
-    parser = all_parsers
+def test_cast_NA_to_bool_raises_error(c_parser_only):
+    parser = c_parser_only
     data = "false,1\n,1\ntrue,"
+    msg = "Bool column has NA values in column 0"
 
-    parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
-                    dtype={'a': 'bool'})
+    with pytest.raises(ValueError, match=msg):
+        parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
+                        dtype={'a': 'bool'})