From 83783874458bd9fe79f8e4124062d8dc1a9f8011 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 28 Apr 2023 20:24:53 -0700 Subject: [PATCH] REF: define _header_line and have_mi_columns non-dynamically --- pandas/io/parsers/python_parser.py | 54 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 4e1bcf54c0ae9..36d5ef7111685 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -365,6 +365,17 @@ def _convert_data( clean_dtypes, ) + @cache_readonly + def _have_mi_columns(self) -> bool: + if self.header is None: + return False + + header = self.header + if isinstance(header, (list, tuple, np.ndarray)): + return len(header) > 1 + else: + return False + def _infer_columns( self, ) -> tuple[list[list[Scalar | None]], int, set[Scalar | None]]: @@ -372,18 +383,16 @@ def _infer_columns( num_original_columns = 0 clear_buffer = True unnamed_cols: set[Scalar | None] = set() - self._header_line = None if self.header is not None: header = self.header + have_mi_columns = self._have_mi_columns if isinstance(header, (list, tuple, np.ndarray)): - have_mi_columns = len(header) > 1 # we have a mi columns, so read an extra line if have_mi_columns: header = list(header) + [header[-1] + 1] else: - have_mi_columns = False header = [header] columns: list[list[Scalar | None]] = [] @@ -531,27 +540,14 @@ def _infer_columns( columns, columns[0], num_original_columns ) else: - try: - line = self._buffered_line() - - except StopIteration as err: - if not names: - raise EmptyDataError("No columns to parse from file") from err - - line = names[:] - - # Store line, otherwise it is lost for guessing the index - self._header_line = line - ncols = len(line) + ncols = len(self._header_line) num_original_columns = ncols if not names: columns = [list(range(ncols))] - columns = self._handle_usecols( - columns, columns[0], num_original_columns - ) - elif self.usecols is None or len(names) >= num_original_columns: - columns = self._handle_usecols([names], names, num_original_columns) + columns = self._handle_usecols(columns, columns[0], ncols) + elif self.usecols is None or len(names) >= ncols: + columns = self._handle_usecols([names], names, ncols) num_original_columns = len(names) elif not callable(self.usecols) and len(names) != len(self.usecols): raise ValueError( @@ -560,12 +556,26 @@ def _infer_columns( ) else: # Ignore output but set used columns. - self._handle_usecols([names], names, ncols) columns = [names] - num_original_columns = ncols + self._handle_usecols(columns, columns[0], ncols) return columns, num_original_columns, unnamed_cols + @cache_readonly + def _header_line(self): + # Store line for reuse in _get_index_name + if self.header is not None: + return None + + try: + line = self._buffered_line() + except StopIteration as err: + if not self.names: + raise EmptyDataError("No columns to parse from file") from err + + line = self.names[:] + return line + def _handle_usecols( self, columns: list[list[Scalar | None]],