Skip to content

Commit 6c67491

Browse files
committed
BUG-20591 modify python parser as well
1 parent 3483795 commit 6c67491

File tree

3 files changed

+21
-7
lines changed

3 files changed

+21
-7
lines changed

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ Backwards incompatible API changes
330330
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
331331
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
332332
- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
333-
- :meth:`read_csv` with C engine will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
333+
- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
334334

335335
.. _whatsnew_0240.api_breaking.deps:
336336

pandas/io/parsers.py

+17-3
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828
from pandas.core.dtypes.cast import astype_nansafe
2929
from pandas.core.dtypes.common import (
30-
ensure_object, is_categorical_dtype, is_dtype_equal, is_float, is_integer,
31-
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
32-
is_string_dtype)
30+
ensure_object, is_bool_dtype, is_categorical_dtype, is_dtype_equal,
31+
is_float, is_integer, is_integer_dtype, is_list_like, is_object_dtype,
32+
is_scalar, is_string_dtype)
3333
from pandas.core.dtypes.dtypes import CategoricalDtype
3434
from pandas.core.dtypes.missing import isna
3535

@@ -2435,6 +2435,20 @@ def _clean_mapping(mapping):
24352435
clean_na_values = self.na_values
24362436
clean_na_fvalues = self.na_fvalues
24372437

2438+
try:
2439+
if isinstance(clean_dtypes, dict):
2440+
for col, dt in clean_dtypes.items():
2441+
if is_bool_dtype(dt) and data[col][data[col] == ''].size:
2442+
raise ValueError("Bool column has NA values in "
2443+
"column {column}".format(column=col))
2444+
elif isinstance(clean_dtypes, string_types):
2445+
for col, values in data.items():
2446+
if any(isna(values)):
2447+
raise ValueError("Bool column has NA values in "
2448+
"column {column}".format(column=col))
2449+
except (AttributeError, TypeError): # invalid input to is_bool_dtype
2450+
pass
2451+
24382452
return self._convert_to_ndarrays(data, clean_na_values,
24392453
clean_na_fvalues, self.verbose,
24402454
clean_conv, clean_dtypes)

pandas/tests/io/parser/test_na_values.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -423,10 +423,10 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
423423
tm.assert_frame_equal(result, expected)
424424

425425

426-
def test_cast_NA_to_bool_raises_error(c_parser_only):
427-
parser = c_parser_only
426+
def test_cast_NA_to_bool_raises_error(all_parsers):
427+
parser = all_parsers
428428
data = "false,1\n,1\ntrue,"
429-
msg = "Bool column has NA values in column 0"
429+
msg = "Bool column has NA values in column [0a]"
430430
with pytest.raises(ValueError, match=msg):
431431
parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
432432
dtype={'a': 'bool'})

0 commit comments

Comments
 (0)