From 158683ee0aea663801ccebe7f4767220f9f810c8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 15 Mar 2024 12:58:12 -0700 Subject: [PATCH 1/3] REF: Don't materialize range if not needed --- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/multi.py | 4 ++-- pandas/core/reshape/pivot.py | 3 ++- pandas/core/sorting.py | 4 ++-- pandas/io/common.py | 6 ++++-- pandas/io/parsers/readers.py | 2 +- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 5023a4b8bd3dd..0b61938d474b9 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2686,7 +2686,7 @@ def _value_counts( names = result_series.index.names # GH#55951 - Temporarily replace names in case they are integers result_series.index.names = range(len(names)) - index_level = list(range(len(self._grouper.groupings))) + index_level = range(len(self._grouper.groupings)) result_series = result_series.sort_index( level=index_level, sort_remaining=False ) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2ef80469a7a13..a384ac4698d7f 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -921,7 +921,7 @@ def _set_levels( if level is None: new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) - level_numbers = list(range(len(new_levels))) + level_numbers = range(len(new_levels)) else: level_numbers = [self._get_level_number(lev) for lev in level] new_levels_list = list(self._levels) @@ -3014,7 +3014,7 @@ def _maybe_to_slice(loc): raise KeyError(key) from err except TypeError: # e.g. test_partial_slicing_with_multiindex partial string slicing - loc, _ = self.get_loc_level(key, list(range(self.nlevels))) + loc, _ = self.get_loc_level(key, range(self.nlevels)) return loc # -- partial selection or non-unique index diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 424af58958f04..7b2fbb54f7d35 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools from typing import ( TYPE_CHECKING, Callable, @@ -422,7 +423,7 @@ def _all_key(key): row_margin = row_margin.stack() # GH#26568. Use names instead of indices in case of numeric names - new_order_indices = [len(cols)] + list(range(len(cols))) + new_order_indices = itertools.chain([len(cols)], range(len(cols))) new_order_names = [row_margin.index.names[i] for i in new_order_indices] row_margin.index = row_margin.index.reorder_levels(new_order_names) else: diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 1f214ca9db85b..5b17ab198d8d8 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -527,9 +527,9 @@ def _ensure_key_mapped_multiindex( else: sort_levels = level - sort_levels = [index._get_level_number(lev) for lev in sort_levels] + sort_levels: range | set = {index._get_level_number(lev) for lev in sort_levels} else: - sort_levels = list(range(index.nlevels)) # satisfies mypy + sort_levels = range(index.nlevels) mapped = [ ensure_key_mapped(index._get_level_values(level), key) diff --git a/pandas/io/common.py b/pandas/io/common.py index abeb789a4b778..35c3a24d8e8f6 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1223,12 +1223,14 @@ def is_potential_multi_index( bool : Whether or not columns could become a MultiIndex """ if index_col is None or isinstance(index_col, bool): - index_col = [] + index_columns = set() + else: + index_columns = set(index_col) return bool( len(columns) and not isinstance(columns, ABCMultiIndex) - and all(isinstance(c, tuple) for c in columns if c not in list(index_col)) + and all(isinstance(c, tuple) for c in columns if c not in index_columns) ) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 6b139b0ad45c0..1ef2e65617c9b 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1482,7 +1482,7 @@ def _clean_options( ) else: if is_integer(skiprows): - skiprows = list(range(skiprows)) + skiprows = range(skiprows) if skiprows is None: skiprows = set() elif not callable(skiprows): From 8bbaeb17c797c2388ca739aa5fec5850cd24ef70 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:21:04 -0700 Subject: [PATCH 2/3] Exclude range --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a384ac4698d7f..a93e673636f32 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3101,7 +3101,7 @@ def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): >>> mi.get_loc_level(["b", "e"]) (1, None) """ - if not isinstance(level, (list, tuple)): + if not isinstance(level, (range, list, tuple)): level = self._get_level_number(level) else: level = [self._get_level_number(lev) for lev in level] From dfa0f7fe2f05e417742def7926c4b04aa318812d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 15 Mar 2024 14:06:40 -0700 Subject: [PATCH 3/3] Some typing --- pandas/core/indexes/multi.py | 2 +- pandas/core/sorting.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a93e673636f32..2cb05dadd5981 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -921,7 +921,7 @@ def _set_levels( if level is None: new_levels = tuple(ensure_index(lev, copy=copy)._view() for lev in levels) - level_numbers = range(len(new_levels)) + level_numbers: range | list[int] = range(len(new_levels)) else: level_numbers = [self._get_level_number(lev) for lev in level] new_levels_list = list(self._levels) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 5b17ab198d8d8..4774b013fc428 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -523,11 +523,11 @@ def _ensure_key_mapped_multiindex( if level is not None: if isinstance(level, (str, int)): - sort_levels = [level] + level_iter = [level] else: - sort_levels = level + level_iter = level - sort_levels: range | set = {index._get_level_number(lev) for lev in sort_levels} + sort_levels: range | set = {index._get_level_number(lev) for lev in level_iter} else: sort_levels = range(index.nlevels)