diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 793818419c910..dde6c5dca0274 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -645,6 +645,7 @@ Deprecations - The ``inplace`` parameter of :meth:`Categorical.remove_categories`, :meth:`Categorical.add_categories`, :meth:`Categorical.reorder_categories`, :meth:`Categorical.rename_categories`, :meth:`Categorical.set_categories` is deprecated and will be removed in a future version (:issue:`37643`) - Deprecated :func:`merge` producing duplicated columns through the ``suffixes`` keyword and already existing columns (:issue:`22818`) - Deprecated setting :attr:`Categorical._codes`, create a new :class:`Categorical` with the desired codes instead (:issue:`40606`) +- Deprecated using ``usecols`` with out of bounds indices for ``read_csv`` with ``engine="c"`` (:issue:`25623`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 0878aff562c12..b2d548e04eab4 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -947,6 +947,17 @@ cdef class TextReader: f"{self.table_width - self.leading_cols} " f"and found {num_cols}") + if (self.usecols is not None and not callable(self.usecols) and + all(isinstance(u, int) for u in self.usecols)): + missing_usecols = [col for col in self.usecols if col >= num_cols] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=6, + ) + results = {} nused = 0 for i in range(self.table_width): diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 573399c9f295c..9082e41698913 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -15,6 +15,7 @@ Tuple, cast, ) +import warnings import numpy as np @@ -477,7 +478,7 @@ def _infer_columns(self): if self.usecols is not None: # Set _use_cols. We don't store columns because they are # overwritten. - self._handle_usecols(columns, names) + self._handle_usecols(columns, names, num_original_columns) else: num_original_columns = len(names) if self._col_indices is not None and len(names) != len( @@ -487,7 +488,9 @@ def _infer_columns(self): else: columns = [names] else: - columns = self._handle_usecols(columns, columns[0]) + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) else: try: line = self._buffered_line() @@ -506,10 +509,12 @@ def _infer_columns(self): columns = [[f"{self.prefix}{i}" for i in range(ncols)]] else: columns = [list(range(ncols))] - columns = self._handle_usecols(columns, columns[0]) + columns = self._handle_usecols( + columns, columns[0], num_original_columns + ) else: if self.usecols is None or len(names) >= num_original_columns: - columns = self._handle_usecols([names], names) + columns = self._handle_usecols([names], names, num_original_columns) num_original_columns = len(names) else: if not callable(self.usecols) and len(names) != len(self.usecols): @@ -518,13 +523,18 @@ def _infer_columns(self): "header fields in the file" ) # Ignore output but set used columns. - self._handle_usecols([names], names) + self._handle_usecols([names], names, ncols) columns = [names] num_original_columns = ncols return columns, num_original_columns, unnamed_cols - def _handle_usecols(self, columns, usecols_key): + def _handle_usecols( + self, + columns: List[List[Union[Optional[str], Optional[int]]]], + usecols_key: List[Union[Optional[str], Optional[int]]], + num_original_columns: int, + ): """ Sets self._col_indices @@ -549,6 +559,16 @@ def _handle_usecols(self, columns, usecols_key): else: col_indices.append(col) else: + missing_usecols = [ + col for col in self.usecols if col >= num_original_columns + ] + if missing_usecols: + warnings.warn( + "Defining usecols with out of bounds indices is deprecated " + "and will raise a ParserError in a future version.", + FutureWarning, + stacklevel=8, + ) col_indices = self.usecols columns = [ diff --git a/pandas/tests/io/parser/usecols/test_usecols_basic.py b/pandas/tests/io/parser/usecols/test_usecols_basic.py index 371b8bea7def2..b86dc5ef85fc6 100644 --- a/pandas/tests/io/parser/usecols/test_usecols_basic.py +++ b/pandas/tests/io/parser/usecols/test_usecols_basic.py @@ -383,7 +383,8 @@ def test_usecols_indices_out_of_bounds(all_parsers, names): a,b 1,2 """ - result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) + with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): + result = parser.read_csv(StringIO(data), usecols=[0, 2], names=names, header=0) expected = DataFrame({"a": [1], "b": [None]}) if names is None and parser.engine == "python": expected = DataFrame({"a": [1]})