CLN: Use generators where list results are re-iterated over (pandas-dev#58296)

mroeschke · pmhatre1 · commit d5574109c925 · 2024-05-06T23:13:49.000-07:00
* Have methods returns generators instead of lists

* Fix ops method, undo block

* Undo internals test

* Make _is_memory_usage_qualified cache_readonly too
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1286,11 +1286,10 @@ def _insert_inaxis_grouper(
             )
 
         # zip in reverse so we can always insert at loc 0
-        for level, (name, lev, in_axis) in enumerate(
+        for level, (name, lev) in enumerate(
             zip(
                 reversed(self._grouper.names),
-                reversed(self._grouper.get_group_levels()),
-                reversed([grp.in_axis for grp in self._grouper.groupings]),
+                self._grouper.get_group_levels(),
             )
         ):
             if name is None:
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -70,6 +70,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import (
+        Generator,
         Hashable,
         Iterator,
         Sequence,
@@ -857,16 +858,15 @@ def _unob_index_and_ids(
         return unob_index, unob_ids
 
     @final
-    def get_group_levels(self) -> list[Index]:
+    def get_group_levels(self) -> Generator[Index, None, None]:
         # Note: only called from _insert_inaxis_grouper, which
         #  is only called for BaseGrouper, never for BinGrouper
         result_index = self.result_index
         if len(self.groupings) == 1:
-            return [result_index]
-        return [
-            result_index.get_level_values(level)
-            for level in range(result_index.nlevels)
-        ]
+            yield result_index
+        else:
+            for level in range(result_index.nlevels - 1, -1, -1):
+                yield result_index.get_level_values(level)
 
     # ------------------------------------------------------------
     # Aggregation functions
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4900,6 +4900,7 @@ def _validate_fill_value(self, value):
             raise TypeError
         return value
 
+    @cache_readonly
     def _is_memory_usage_qualified(self) -> bool:
         """
         Return a boolean if we need a qualified .info display.
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -1351,13 +1351,14 @@ def __contains__(self, key: Any) -> bool:
     def dtype(self) -> np.dtype:
         return np.dtype("O")
 
+    @cache_readonly
     def _is_memory_usage_qualified(self) -> bool:
         """return a boolean if we need a qualified .info display"""
 
         def f(level) -> bool:
             return "mixed" in level or "string" in level or "unicode" in level
 
-        return any(f(level) for level in self._inferred_type_levels)
+        return any(f(level.inferred_type) for level in self.levels)
 
     # Cannot determine type of "memory_usage"
     @doc(Index.memory_usage)  # type: ignore[has-type]
@@ -1659,11 +1660,6 @@ def is_monotonic_decreasing(self) -> bool:
         # monotonic decreasing if and only if reverse is monotonic increasing
         return self[::-1].is_monotonic_increasing
 
-    @cache_readonly
-    def _inferred_type_levels(self) -> list[str]:
-        """return a list of the inferred types, one for each level"""
-        return [i.inferred_type for i in self.levels]
-
     @doc(Index.duplicated)
     def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
         shape = tuple(len(lev) for lev in self.levels)
diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py
@@ -39,7 +39,10 @@
 )
 
 if TYPE_CHECKING:
-    from collections.abc import Sequence
+    from collections.abc import (
+        Generator,
+        Sequence,
+    )
 
     from pandas._typing import (
         ArrayLike,
@@ -109,12 +112,10 @@ def concatenate_managers(
         out.axes = axes
         return out
 
-    concat_plan = _get_combined_plan(mgrs)
-
     blocks = []
     values: ArrayLike
 
-    for placement, join_units in concat_plan:
+    for placement, join_units in _get_combined_plan(mgrs):
         unit = join_units[0]
         blk = unit.block
 
@@ -249,14 +250,12 @@ def _concat_homogeneous_fastpath(
 
 def _get_combined_plan(
     mgrs: list[BlockManager],
-) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
-    plan = []
-
+) -> Generator[tuple[BlockPlacement, list[JoinUnit]], None, None]:
     max_len = mgrs[0].shape[0]
 
     blknos_list = [mgr.blknos for mgr in mgrs]
     pairs = libinternals.get_concat_blkno_indexers(blknos_list)
-    for ind, (blknos, bp) in enumerate(pairs):
+    for blknos, bp in pairs:
         # assert bp.is_slice_like
         # assert len(bp) > 0
 
@@ -268,9 +267,7 @@ def _get_combined_plan(
             unit = JoinUnit(nb)
             units_for_bp.append(unit)
 
-        plan.append((bp, units_for_bp))
-
-    return plan
+        yield bp, units_for_bp
 
 
 def _get_block_for_concat_plan(
diff --git a/pandas/io/formats/info.py b/pandas/io/formats/info.py
@@ -422,7 +422,7 @@ def size_qualifier(self) -> str:
                 # categories)
                 if (
                     "object" in self.dtype_counts
-                    or self.data.index._is_memory_usage_qualified()
+                    or self.data.index._is_memory_usage_qualified
                 ):
                     size_qualifier = "+"
         return size_qualifier
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -177,7 +177,7 @@ def _convert_arrays_to_dataframe(
             result_arrays.append(ArrowExtensionArray(pa_array))
         arrays = result_arrays  # type: ignore[assignment]
     if arrays:
-        df = DataFrame(dict(zip(list(range(len(columns))), arrays)))
+        df = DataFrame(dict(zip(range(len(columns)), arrays)))
         df.columns = columns
         return df
     else:

Original file line number	Diff line number	Diff line change
`@@ -1286,11 +1286,10 @@ def _insert_inaxis_grouper(`
`1286`	`1286`	`)`
`1287`	`1287`
`1288`	`1288`	`# zip in reverse so we can always insert at loc 0`
`1289`		`- for level, (name, lev, in_axis) in enumerate(`
	`1289`	`+ for level, (name, lev) in enumerate(`
`1290`	`1290`	`zip(`
`1291`	`1291`	`reversed(self._grouper.names),`
`1292`		`- reversed(self._grouper.get_group_levels()),`
`1293`		`- reversed([grp.in_axis for grp in self._grouper.groupings]),`
	`1292`	`+ self._grouper.get_group_levels(),`
`1294`	`1293`	`)`
`1295`	`1294`	`):`
`1296`	`1295`	`if name is None:`