From 011a899113287fd1791bca3e9ed4b324ead1cd34 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 16 Jul 2019 14:56:30 -0700 Subject: [PATCH 1/2] Reverted 25623 --- doc/source/whatsnew/v0.25.0.rst | 1 - pandas/io/parsers.py | 19 ------------------- pandas/tests/io/parser/test_usecols.py | 19 ------------------- 3 files changed, 39 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 7397ae8fda80c..3445beaa78317 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -1087,7 +1087,6 @@ I/O - Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`) - Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`) - Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`) -- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`) - Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`) - Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`) - Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6cc47b984914a..4acc46dcb459f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1947,12 +1947,6 @@ def __init__(self, src, **kwds): ): _validate_usecols_names(usecols, self.orig_names) - # GH 25623 - # validate that column indices in usecols are not out of bounds - elif self.usecols_dtype == "integer": - indices = range(self._reader.table_width) - _validate_usecols_names(usecols, indices) - if len(self.names) > len(usecols): self.names = [ n @@ -2258,7 +2252,6 @@ def __init__(self, f, **kwds): self.skipinitialspace = kwds["skipinitialspace"] self.lineterminator = kwds["lineterminator"] self.quoting = kwds["quoting"] - self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"]) self.skip_blank_lines = kwds["skip_blank_lines"] self.warn_bad_lines = kwds["warn_bad_lines"] @@ -2665,13 +2658,6 @@ def _infer_columns(self): if clear_buffer: self._clear_buffer() - # GH 25623 - # validate that column indices in usecols are not out of bounds - if self.usecols_dtype == "integer": - for col in columns: - indices = range(len(col)) - _validate_usecols_names(self.usecols, indices) - if names is not None: if (self.usecols is not None and len(names) != len(self.usecols)) or ( self.usecols is None and len(names) != len(columns[0]) @@ -2706,11 +2692,6 @@ def _infer_columns(self): ncols = len(line) num_original_columns = ncols - # GH 25623 - # validate that column indices in usecols are not out of bounds - if self.usecols_dtype == "integer": - _validate_usecols_names(self.usecols, range(ncols)) - if not names: if self.prefix: columns = [ diff --git a/pandas/tests/io/parser/test_usecols.py b/pandas/tests/io/parser/test_usecols.py index 47c4f93fbf59c..afe19608ea5c6 100644 --- a/pandas/tests/io/parser/test_usecols.py +++ b/pandas/tests/io/parser/test_usecols.py @@ -22,25 +22,6 @@ ) -@pytest.mark.parametrize( - "names,usecols,missing", - [ - (None, [0, 3], r"\[3\]"), - (["a", "b", "c"], [0, -1, 2], r"\[-1\]"), - (None, [3], r"\[3\]"), - (["a"], [3], r"\[3\]"), - ], -) -def test_usecols_out_of_bounds(all_parsers, names, usecols, missing): - # See gh-25623 - data = "a,b,c\n1,2,3\n4,5,6" - parser = all_parsers - - mssg = _msg_validate_usecols_names.format(missing) - with pytest.raises(ValueError, match=mssg): - parser.read_csv(StringIO(data), usecols=usecols, names=names) - - def test_raise_on_mixed_dtype_usecols(all_parsers): # See gh-12678 data = """a,b,c From d54962d572ef87e13ce35a580558f58862ba8201 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Tue, 16 Jul 2019 16:32:26 -0700 Subject: [PATCH 2/2] Added back self.usecols assignment --- pandas/io/parsers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4acc46dcb459f..300f17bd25432 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2252,6 +2252,7 @@ def __init__(self, f, **kwds): self.skipinitialspace = kwds["skipinitialspace"] self.lineterminator = kwds["lineterminator"] self.quoting = kwds["quoting"] + self.usecols, _ = _validate_usecols_arg(kwds["usecols"]) self.skip_blank_lines = kwds["skip_blank_lines"] self.warn_bad_lines = kwds["warn_bad_lines"]