Skip to content

Commit c54643b

Browse files
phoflJulianWgs
authored andcommitted
Deprecated usecols with out of bounds indices in read_csv (pandas-dev#41130)
1 parent 216d5cd commit c54643b

File tree

4 files changed

+40
-7
lines changed

4 files changed

+40
-7
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -645,6 +645,7 @@ Deprecations
645645
- The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`)
646646
- Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`)
647647
- Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`)
648+
- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`)
648649

649650
.. ---------------------------------------------------------------------------
650651

pandas/_libs/parsers.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,17 @@ cdef class TextReader:
947947
f"{self.table_width - self.leading_cols} "
948948
f"and found {num_cols}")
949949

950+
if (self.usecols is not None and not callable(self.usecols) and
951+
all(isinstance(u, int) for u in self.usecols)):
952+
missing_usecols = [col for col in self.usecols if col >= num_cols]
953+
if missing_usecols:
954+
warnings.warn(
955+
"Defining usecols with out of bounds indices is deprecated "
956+
"and will raise a ParserError in a future version.",
957+
FutureWarning,
958+
stacklevel=6,
959+
)
960+
950961
results = {}
951962
nused = 0
952963
for i in range(self.table_width):

pandas/io/parsers/python_parser.py

+26-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Tuple,
1616
cast,
1717
)
18+
import warnings
1819

1920
import numpy as np
2021

@@ -477,7 +478,7 @@ def _infer_columns(self):
477478
if self.usecols is not None:
478479
# Set _use_cols. We don't store columns because they are
479480
# overwritten.
480-
self._handle_usecols(columns, names)
481+
self._handle_usecols(columns, names, num_original_columns)
481482
else:
482483
num_original_columns = len(names)
483484
if self._col_indices is not None and len(names) != len(
@@ -487,7 +488,9 @@ def _infer_columns(self):
487488
else:
488489
columns = [names]
489490
else:
490-
columns = self._handle_usecols(columns, columns[0])
491+
columns = self._handle_usecols(
492+
columns, columns[0], num_original_columns
493+
)
491494
else:
492495
try:
493496
line = self._buffered_line()
@@ -506,10 +509,12 @@ def _infer_columns(self):
506509
columns = [[f"{self.prefix}{i}" for i in range(ncols)]]
507510
else:
508511
columns = [list(range(ncols))]
509-
columns = self._handle_usecols(columns, columns[0])
512+
columns = self._handle_usecols(
513+
columns, columns[0], num_original_columns
514+
)
510515
else:
511516
if self.usecols is None or len(names) >= num_original_columns:
512-
columns = self._handle_usecols([names], names)
517+
columns = self._handle_usecols([names], names, num_original_columns)
513518
num_original_columns = len(names)
514519
else:
515520
if not callable(self.usecols) and len(names) != len(self.usecols):
@@ -518,13 +523,18 @@ def _infer_columns(self):
518523
"header fields in the file"
519524
)
520525
# Ignore output but set used columns.
521-
self._handle_usecols([names], names)
526+
self._handle_usecols([names], names, ncols)
522527
columns = [names]
523528
num_original_columns = ncols
524529

525530
return columns, num_original_columns, unnamed_cols
526531

527-
def _handle_usecols(self, columns, usecols_key):
532+
def _handle_usecols(
533+
self,
534+
columns: List[List[Union[Optional[str], Optional[int]]]],
535+
usecols_key: List[Union[Optional[str], Optional[int]]],
536+
num_original_columns: int,
537+
):
528538
"""
529539
Sets self._col_indices
530540
@@ -549,6 +559,16 @@ def _handle_usecols(self, columns, usecols_key):
549559
else:
550560
col_indices.append(col)
551561
else:
562+
missing_usecols = [
563+
col for col in self.usecols if col >= num_original_columns
564+
]
565+
if missing_usecols:
566+
warnings.warn(
567+
"Defining usecols with out of bounds indices is deprecated "
568+
"and will raise a ParserError in a future version.",
569+
FutureWarning,
570+
stacklevel=8,
571+
)
552572
col_indices = self.usecols
553573

554574
columns = [

pandas/tests/io/parser/usecols/test_usecols_basic.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,8 @@ def test_usecols_indices_out_of_bounds(all_parsers, names):
383383
a,b
384384
1,2
385385
"""
386-
result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
386+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
387+
result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0)
387388
expected = DataFrame({"a": [1], "b": [None]})
388389
if names is None and parser.engine == "python":
389390
expected = DataFrame({"a": [1]})

0 commit comments

Comments
 (0)