Skip to content

Commit 8ffdf7a

Browse files
WillAydjreback
authored andcommitted
Reallow usecols to reference OOB indices - reverts 25623 (#27426)
1 parent 89d79c5 commit 8ffdf7a

File tree

3 files changed

+1
-39
lines changed

3 files changed

+1
-39
lines changed

doc/source/whatsnew/v0.25.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -1130,7 +1130,6 @@ I/O
11301130
- Bug in :meth:`DataFrame.to_html` where header numbers would ignore display options when rounding (:issue:`17280`)
11311131
- Bug in :func:`read_hdf` where reading a table from an HDF5 file written directly with PyTables fails with a ``ValueError`` when using a sub-selection via the ``start`` or ``stop`` arguments (:issue:`11188`)
11321132
- Bug in :func:`read_hdf` not properly closing store after a ``KeyError`` is raised (:issue:`25766`)
1133-
- Bug in ``read_csv`` which would not raise ``ValueError`` if a column index in ``usecols`` was out of bounds (:issue:`25623`)
11341133
- Improved the explanation for the failure when value labels are repeated in Stata dta files and suggested work-arounds (:issue:`25772`)
11351134
- Improved :meth:`pandas.read_stata` and :class:`pandas.io.stata.StataReader` to read incorrectly formatted 118 format files saved by Stata (:issue:`25960`)
11361135
- Improved the ``col_space`` parameter in :meth:`DataFrame.to_html` to accept a string so CSS length values can be set correctly (:issue:`25941`)

pandas/io/parsers.py

+1-19
Original file line numberDiff line numberDiff line change
@@ -1947,12 +1947,6 @@ def __init__(self, src, **kwds):
19471947
):
19481948
_validate_usecols_names(usecols, self.orig_names)
19491949

1950-
# GH 25623
1951-
# validate that column indices in usecols are not out of bounds
1952-
elif self.usecols_dtype == "integer":
1953-
indices = range(self._reader.table_width)
1954-
_validate_usecols_names(usecols, indices)
1955-
19561950
if len(self.names) > len(usecols):
19571951
self.names = [
19581952
n
@@ -2258,7 +2252,7 @@ def __init__(self, f, **kwds):
22582252
self.skipinitialspace = kwds["skipinitialspace"]
22592253
self.lineterminator = kwds["lineterminator"]
22602254
self.quoting = kwds["quoting"]
2261-
self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"])
2255+
self.usecols, _ = _validate_usecols_arg(kwds["usecols"])
22622256
self.skip_blank_lines = kwds["skip_blank_lines"]
22632257

22642258
self.warn_bad_lines = kwds["warn_bad_lines"]
@@ -2665,13 +2659,6 @@ def _infer_columns(self):
26652659
if clear_buffer:
26662660
self._clear_buffer()
26672661

2668-
# GH 25623
2669-
# validate that column indices in usecols are not out of bounds
2670-
if self.usecols_dtype == "integer":
2671-
for col in columns:
2672-
indices = range(len(col))
2673-
_validate_usecols_names(self.usecols, indices)
2674-
26752662
if names is not None:
26762663
if (self.usecols is not None and len(names) != len(self.usecols)) or (
26772664
self.usecols is None and len(names) != len(columns[0])
@@ -2706,11 +2693,6 @@ def _infer_columns(self):
27062693
ncols = len(line)
27072694
num_original_columns = ncols
27082695

2709-
# GH 25623
2710-
# validate that column indices in usecols are not out of bounds
2711-
if self.usecols_dtype == "integer":
2712-
_validate_usecols_names(self.usecols, range(ncols))
2713-
27142696
if not names:
27152697
if self.prefix:
27162698
columns = [

pandas/tests/io/parser/test_usecols.py

-19
Original file line numberDiff line numberDiff line change
@@ -22,25 +22,6 @@
2222
)
2323

2424

25-
@pytest.mark.parametrize(
26-
"names,usecols,missing",
27-
[
28-
(None, [0, 3], r"\[3\]"),
29-
(["a", "b", "c"], [0, -1, 2], r"\[-1\]"),
30-
(None, [3], r"\[3\]"),
31-
(["a"], [3], r"\[3\]"),
32-
],
33-
)
34-
def test_usecols_out_of_bounds(all_parsers, names, usecols, missing):
35-
# See gh-25623
36-
data = "a,b,c\n1,2,3\n4,5,6"
37-
parser = all_parsers
38-
39-
mssg = _msg_validate_usecols_names.format(missing)
40-
with pytest.raises(ValueError, match=mssg):
41-
parser.read_csv(StringIO(data), usecols=usecols, names=names)
42-
43-
4425
def test_raise_on_mixed_dtype_usecols(all_parsers):
4526
# See gh-12678
4627
data = """a,b,c

0 commit comments

Comments
 (0)