Skip to content

Commit b4a1780

Browse files
committed
BUG-20591 move logic to _convert_to_ndarrays
1 parent 0d67f22 commit b4a1780

File tree

3 files changed

+20
-19
lines changed

3 files changed

+20
-19
lines changed

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ Backwards incompatible API changes
330330
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
331331
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
332332
- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
333-
- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
333+
- :meth:`read_csv` will now raise a ``ValueError`` if a column with missing values is declared as having dtype ``bool`` (:issue:`20591`)
334334

335335
.. _whatsnew_0240.api_breaking.deps:
336336

pandas/io/parsers.py

+10-15
Original file line numberDiff line numberDiff line change
@@ -1669,6 +1669,16 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False,
16691669

16701670
# type specified in dtype param
16711671
if cast_type and not is_dtype_equal(cvals, cast_type):
1672+
try:
1673+
if (is_bool_dtype(cast_type) and
1674+
not is_categorical_dtype(cast_type)
1675+
and set(values) - set(col_na_values)):
1676+
raise ValueError("Bool column has NA values in "
1677+
"column {column}"
1678+
.format(column=c))
1679+
except (AttributeError, TypeError):
1680+
# invalid input to is_bool_dtype
1681+
pass
16721682
cvals = self._cast_types(cvals, cast_type, c)
16731683

16741684
result[c] = cvals
@@ -2435,21 +2445,6 @@ def _clean_mapping(mapping):
24352445
clean_na_values = self.na_values
24362446
clean_na_fvalues = self.na_fvalues
24372447

2438-
try:
2439-
if isinstance(clean_dtypes, dict):
2440-
for col, dt in clean_dtypes.items():
2441-
if is_bool_dtype(dt) and data[col][data[col] == ''].size:
2442-
raise ValueError("Bool column has NA values in "
2443-
"column {column}".format(column=col))
2444-
elif (isinstance(clean_dtypes, string_types) and
2445-
is_bool_dtype(clean_dtypes)):
2446-
for col, values in data.items():
2447-
if any(isna(values)):
2448-
raise ValueError("Bool column has NA values in "
2449-
"column {column}".format(column=col))
2450-
except (AttributeError, TypeError): # invalid input to is_bool_dtype
2451-
pass
2452-
24532448
return self._convert_to_ndarrays(data, clean_na_values,
24542449
clean_na_fvalues, self.verbose,
24552450
clean_conv, clean_dtypes)

pandas/tests/io/parser/test_na_values.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -423,10 +423,16 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
423423
tm.assert_frame_equal(result, expected)
424424

425425

426-
def test_cast_NA_to_bool_raises_error(all_parsers):
426+
@pytest.mark.parametrize("data", [
427+
"false,1\n,1\ntrue,",
428+
"false,1\nnull,1\ntrue,",
429+
"false,1\nnan,1\ntrue,",
430+
])
431+
def test_cast_NA_to_bool_raises_error(all_parsers, data):
427432
parser = all_parsers
428-
data = "false,1\n,1\ntrue,"
429-
msg = "Bool column has NA values in column [0a]"
433+
msg = ("(Bool column has NA values in column [0a])|"
434+
"(cannot safely convert passed user dtype of "
435+
" bool for object dtyped data in column 0)")
430436
with pytest.raises(ValueError, match=msg):
431437
parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
432438
dtype={'a': 'bool'})

0 commit comments

Comments
 (0)