Skip to content

Commit 909ec7a

Browse files
authored
REGR: Avoid error when defining non existing usecols with python engine (#41244)
1 parent d9e99a5 commit 909ec7a

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

pandas/io/parsers/python_parser.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -298,10 +298,10 @@ def _exclude_implicit_index(self, alldata):
298298
# error: Cannot determine type of 'index_col'
299299
offset = len(self.index_col) # type: ignore[has-type]
300300

301-
if self._col_indices is not None and len(names) != len(self._col_indices):
302-
names = [names[i] for i in sorted(self._col_indices)]
303-
304-
return {name: alldata[i + offset] for i, name in enumerate(names)}, names
301+
len_alldata = len(alldata)
302+
return {
303+
name: alldata[i + offset] for i, name in enumerate(names) if i < len_alldata
304+
}, names
305305

306306
# legacy
307307
def get_chunk(self, size=None):
@@ -472,7 +472,12 @@ def _infer_columns(self):
472472
self._handle_usecols(columns, names)
473473
else:
474474
num_original_columns = len(names)
475-
columns = [names]
475+
if self._col_indices is not None and len(names) != len(
476+
self._col_indices
477+
):
478+
columns = [[names[i] for i in sorted(self._col_indices)]]
479+
else:
480+
columns = [names]
476481
else:
477482
columns = self._handle_usecols(columns, columns[0])
478483
else:

pandas/tests/io/parser/usecols/test_usecols_basic.py

+15
Original file line numberDiff line numberDiff line change
@@ -373,3 +373,18 @@ def test_usecols_subset_names_mismatch_orig_columns(all_parsers, usecols):
373373
result = parser.read_csv(StringIO(data), header=0, names=names, usecols=usecols)
374374
expected = DataFrame({"A": [1, 5], "C": [3, 7]})
375375
tm.assert_frame_equal(result, expected)
376+
377+
378+
@pytest.mark.parametrize("names", [None, ["a", "b"]])
379+
def test_usecols_indices_out_of_bounds(all_parsers, names):
380+
# GH#25623
381+
parser = all_parsers
382+
data = """
383+
a,b
384+
1,2
385+
"""
386+
result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
387+
expected = DataFrame({"a": [1], "b": [None]})
388+
if names is None and parser.engine == "python":
389+
expected = DataFrame({"a": [1]})
390+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)