BUG-20591 modify python parser as well

JustinZhengBC · JustinZhengBC · commit 6c674917386f · 2018-11-28T16:53:12.000-08:00
diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -330,7 +330,7 @@ Backwards incompatible API changes
 - :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
 - Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
 - ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
-- :meth:`read_csv` with C engine will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
+- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
 
 .. _whatsnew_0240.api_breaking.deps:
 
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -27,9 +27,9 @@
 
 from pandas.core.dtypes.cast import astype_nansafe
 from pandas.core.dtypes.common import (
-    ensure_object, is_categorical_dtype, is_dtype_equal, is_float, is_integer,
-    is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
-    is_string_dtype)
+    ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
+    is_float, is_integer, is_integer_dtype, is_list_like, is_object_dtype,
+    is_scalar, is_string_dtype)
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.missing import isna
 
@@ -2435,6 +2435,20 @@ def _clean_mapping(mapping):
             clean_na_values = self.na_values
             clean_na_fvalues = self.na_fvalues
 
+        try:
+            if isinstance(clean_dtypes, dict):
+                for col, dt in clean_dtypes.items():
+                    if is_bool_dtype(dt) and data[col][data[col] == ''].size:
+                        raise ValueError("Bool column has NA values in "
+                                         "column {column}".format(column=col))
+            elif isinstance(clean_dtypes, string_types):
+                for col, values in data.items():
+                    if any(isna(values)):
+                        raise ValueError("Bool column has NA values in "
+                                         "column {column}".format(column=col))
+        except (AttributeError, TypeError):  # invalid input to is_bool_dtype
+            pass
+
         return self._convert_to_ndarrays(data, clean_na_values,
                                          clean_na_fvalues, self.verbose,
                                          clean_conv, clean_dtypes)
diff --git a/pandas/tests/io/parser/test_na_values.py b/pandas/tests/io/parser/test_na_values.py
@@ -423,10 +423,10 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
-def test_cast_NA_to_bool_raises_error(c_parser_only):
-    parser = c_parser_only
+def test_cast_NA_to_bool_raises_error(all_parsers):
+    parser = all_parsers
     data = "false,1\n,1\ntrue,"
-    msg = "Bool column has NA values in column 0"
+    msg = "Bool column has NA values in column [0a]"
     with pytest.raises(ValueError, match=msg):
         parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
                         dtype={'a': 'bool'})