Skip to content

Commit ea1ec6f

Browse files
committed
BUG-20591 specify error in test
1 parent cbd2ada commit ea1ec6f

File tree

3 files changed

+24
-35
lines changed

3 files changed

+24
-35
lines changed

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,7 @@ Backwards incompatible API changes
330330
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
331331
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
332332
- ``max_rows`` and ``max_cols`` parameters removed from :class:`HTMLFormatter` since truncation is handled by :class:`DataFrameFormatter` (:issue:`23818`)
333-
- :meth:`read_csv` will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
333+
- :meth:`read_csv` with C engine will now throw a ``ValueError`` if a column with missing values is declared as having ``dtype`` ``bool`` (:issue:`20591`)
334334

335335
.. _whatsnew_0240.api_breaking.deps:
336336

pandas/io/parsers.py

+17-29
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,6 @@ def _read(filepath_or_buffer, kwds):
501501

502502
_fwf_defaults = {
503503
'colspecs': 'infer',
504-
'infer_nrows': 100,
505504
'widths': None,
506505
}
507506

@@ -719,8 +718,8 @@ def parser_f(filepath_or_buffer,
719718
)(read_table)
720719

721720

722-
def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
723-
infer_nrows=100, **kwds):
721+
def read_fwf(filepath_or_buffer, colspecs='infer',
722+
widths=None, **kwds):
724723

725724
r"""
726725
Read a table of fixed-width formatted lines into DataFrame.
@@ -753,11 +752,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
753752
widths : list of int, optional
754753
A list of field widths which can be used instead of 'colspecs' if
755754
the intervals are contiguous.
756-
infer_nrows : int, default 100
757-
The number of rows to consider when letting the parser determine the
758-
`colspecs`.
759-
760-
.. versionadded:: 0.24.0
761755
**kwds : optional
762756
Optional keyword arguments can be passed to ``TextFileReader``.
763757
@@ -792,7 +786,6 @@ def read_fwf(filepath_or_buffer, colspecs='infer', widths=None,
792786
col += w
793787

794788
kwds['colspecs'] = colspecs
795-
kwds['infer_nrows'] = infer_nrows
796789
kwds['engine'] = 'python-fwf'
797790
return _read(filepath_or_buffer, kwds)
798791

@@ -1752,8 +1745,8 @@ def _cast_types(self, values, cast_type, column):
17521745

17531746
cats = Index(values).unique().dropna()
17541747
values = Categorical._from_inferred_categories(
1755-
cats, cats.get_indexer(values), cast_type,
1756-
true_values=self.true_values)
1748+
cats, cats.get_indexer(values), cast_type
1749+
)
17571750

17581751
else:
17591752
try:
@@ -3449,15 +3442,13 @@ class FixedWidthReader(BaseIterator):
34493442
A reader of fixed-width lines.
34503443
"""
34513444

3452-
def __init__(self, f, colspecs, delimiter, comment, skiprows=None,
3453-
infer_nrows=100):
3445+
def __init__(self, f, colspecs, delimiter, comment, skiprows=None):
34543446
self.f = f
34553447
self.buffer = None
34563448
self.delimiter = '\r\n' + delimiter if delimiter else '\n\r\t '
34573449
self.comment = comment
34583450
if colspecs == 'infer':
3459-
self.colspecs = self.detect_colspecs(infer_nrows=infer_nrows,
3460-
skiprows=skiprows)
3451+
self.colspecs = self.detect_colspecs(skiprows=skiprows)
34613452
else:
34623453
self.colspecs = colspecs
34633454

@@ -3473,20 +3464,19 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None,
34733464
raise TypeError('Each column specification must be '
34743465
'2 element tuple or list of integers')
34753466

3476-
def get_rows(self, infer_nrows, skiprows=None):
3467+
def get_rows(self, n, skiprows=None):
34773468
"""
34783469
Read rows from self.f, skipping as specified.
34793470
3480-
We distinguish buffer_rows (the first <= infer_nrows
3481-
lines) from the rows returned to detect_colspecs
3482-
because it's simpler to leave the other locations
3483-
with skiprows logic alone than to modify them to
3484-
deal with the fact we skipped some rows here as
3485-
well.
3471+
We distinguish buffer_rows (the first <= n lines)
3472+
from the rows returned to detect_colspecs because
3473+
it's simpler to leave the other locations with
3474+
skiprows logic alone than to modify them to deal
3475+
with the fact we skipped some rows here as well.
34863476
34873477
Parameters
34883478
----------
3489-
infer_nrows : int
3479+
n : int
34903480
Number of rows to read from self.f, not counting
34913481
rows that are skipped.
34923482
skiprows: set, optional
@@ -3506,16 +3496,16 @@ def get_rows(self, infer_nrows, skiprows=None):
35063496
if i not in skiprows:
35073497
detect_rows.append(row)
35083498
buffer_rows.append(row)
3509-
if len(detect_rows) >= infer_nrows:
3499+
if len(detect_rows) >= n:
35103500
break
35113501
self.buffer = iter(buffer_rows)
35123502
return detect_rows
35133503

3514-
def detect_colspecs(self, infer_nrows=100, skiprows=None):
3504+
def detect_colspecs(self, n=100, skiprows=None):
35153505
# Regex escape the delimiters
35163506
delimiters = ''.join(r'\%s' % x for x in self.delimiter)
35173507
pattern = re.compile('([^%s]+)' % delimiters)
3518-
rows = self.get_rows(infer_nrows, skiprows)
3508+
rows = self.get_rows(n, skiprows)
35193509
if not rows:
35203510
raise EmptyDataError("No rows from which to infer column width")
35213511
max_len = max(map(len, rows))
@@ -3554,10 +3544,8 @@ class FixedWidthFieldParser(PythonParser):
35543544
def __init__(self, f, **kwds):
35553545
# Support iterators, convert to a list.
35563546
self.colspecs = kwds.pop('colspecs')
3557-
self.infer_nrows = kwds.pop('infer_nrows')
35583547
PythonParser.__init__(self, f, **kwds)
35593548

35603549
def _make_reader(self, f):
35613550
self.data = FixedWidthReader(f, self.colspecs, self.delimiter,
3562-
self.comment, self.skiprows,
3563-
self.infer_nrows)
3551+
self.comment, self.skiprows)

pandas/tests/io/parser/test_na_values.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -423,10 +423,11 @@ def test_na_values_with_dtype_str_and_na_filter(all_parsers, na_filter):
423423
tm.assert_frame_equal(result, expected)
424424

425425

426-
@pytest.mark.xfail
427-
def test_cast_NA_to_bool_raises_error(all_parsers):
428-
parser = all_parsers
426+
def test_cast_NA_to_bool_raises_error(c_parser_only):
427+
parser = c_parser_only
429428
data = "false,1\n,1\ntrue,"
429+
msg = "Bool column has NA values in column 0"
430430

431-
parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
432-
dtype={'a': 'bool'})
431+
with pytest.raises(ValueError, match=msg):
432+
parser.read_csv(StringIO(data), header=None, names=['a', 'b'],
433+
dtype={'a': 'bool'})

0 commit comments

Comments
 (0)