Skip to content

Commit cbd2ada

Browse files
committed
BUG-20591 read_csv raises ValueError for bool columns with missing values
1 parent 30c1290 commit cbd2ada

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ Backwards incompatible API changes
330330
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
331331
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
332332
- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
333+
- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
333334

334335
.. _whatsnew_0240.api_breaking.deps:
335336

pandas/_libs/parsers.pyx

+4
Original file line numberDiff line numberDiff line change
@@ -1245,6 +1245,10 @@ cdef class TextReader:
12451245
result, na_count = _try_bool_flex(self.parser, i, start, end,
12461246
na_filter, na_hashset,
12471247
self.true_set, self.false_set)
1248+
if user_dtype and na_count is not None:
1249+
if na_count > 0:
1250+
raise ValueError("Bool column has NA values in "
1251+
"column {column}".format(column=i))
12481252
return result, na_count
12491253

12501254
elif dtype.kind == 'S':

pandas/tests/io/parser/test_na_values.py

+9
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,12 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
421421

422422
result = parser.read_csv(StringIO(data), na_filter=na_filter, dtype=str)
423423
tm.assert_frame_equal(result, expected)
424+
425+
426+
@pytest.mark.xfail
427+
def test_cast_NA_to_bool_raises_error(all_parsers):
428+
parser = all_parsers
429+
data = "false,1\n,1\ntrue,"
430+
431+
parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
432+
dtype={'a': 'bool'})

0 commit comments

Comments
 (0)