Skip to content

Commit d557410

Browse files
mroeschkepmhatre1
authored andcommitted
CLN: Use generators where list results are re-iterated over (pandas-dev#58296)
* Have methods returns generators instead of lists * Fix ops method, undo block * Undo internals test * Make _is_memory_usage_qualified cache_readonly too
1 parent 393b814 commit d557410

File tree

7 files changed

+21
-28
lines changed

7 files changed

+21
-28
lines changed

pandas/core/groupby/groupby.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1286,11 +1286,10 @@ def _insert_inaxis_grouper(
12861286
)
12871287

12881288
# zip in reverse so we can always insert at loc 0
1289-
for level, (name, lev, in_axis) in enumerate(
1289+
for level, (name, lev) in enumerate(
12901290
zip(
12911291
reversed(self._grouper.names),
1292-
reversed(self._grouper.get_group_levels()),
1293-
reversed([grp.in_axis for grp in self._grouper.groupings]),
1292+
self._grouper.get_group_levels(),
12941293
)
12951294
):
12961295
if name is None:

pandas/core/groupby/ops.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070

7171
if TYPE_CHECKING:
7272
from collections.abc import (
73+
Generator,
7374
Hashable,
7475
Iterator,
7576
Sequence,
@@ -857,16 +858,15 @@ def _unob_index_and_ids(
857858
return unob_index, unob_ids
858859

859860
@final
860-
def get_group_levels(self) -> list[Index]:
861+
def get_group_levels(self) -> Generator[Index, None, None]:
861862
# Note: only called from _insert_inaxis_grouper, which
862863
# is only called for BaseGrouper, never for BinGrouper
863864
result_index = self.result_index
864865
if len(self.groupings) == 1:
865-
return [result_index]
866-
return [
867-
result_index.get_level_values(level)
868-
for level in range(result_index.nlevels)
869-
]
866+
yield result_index
867+
else:
868+
for level in range(result_index.nlevels - 1, -1, -1):
869+
yield result_index.get_level_values(level)
870870

871871
# ------------------------------------------------------------
872872
# Aggregation functions

pandas/core/indexes/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4900,6 +4900,7 @@ def _validate_fill_value(self, value):
49004900
raise TypeError
49014901
return value
49024902

4903+
@cache_readonly
49034904
def _is_memory_usage_qualified(self) -> bool:
49044905
"""
49054906
Return a boolean if we need a qualified .info display.

pandas/core/indexes/multi.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1351,13 +1351,14 @@ def __contains__(self, key: Any) -> bool:
13511351
def dtype(self) -> np.dtype:
13521352
return np.dtype("O")
13531353

1354+
@cache_readonly
13541355
def _is_memory_usage_qualified(self) -> bool:
13551356
"""return a boolean if we need a qualified .info display"""
13561357

13571358
def f(level) -> bool:
13581359
return "mixed" in level or "string" in level or "unicode" in level
13591360

1360-
return any(f(level) for level in self._inferred_type_levels)
1361+
return any(f(level.inferred_type) for level in self.levels)
13611362

13621363
# Cannot determine type of "memory_usage"
13631364
@doc(Index.memory_usage) # type: ignore[has-type]
@@ -1659,11 +1660,6 @@ def is_monotonic_decreasing(self) -> bool:
16591660
# monotonic decreasing if and only if reverse is monotonic increasing
16601661
return self[::-1].is_monotonic_increasing
16611662

1662-
@cache_readonly
1663-
def _inferred_type_levels(self) -> list[str]:
1664-
"""return a list of the inferred types, one for each level"""
1665-
return [i.inferred_type for i in self.levels]
1666-
16671663
@doc(Index.duplicated)
16681664
def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
16691665
shape = tuple(len(lev) for lev in self.levels)

pandas/core/internals/concat.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@
3939
)
4040

4141
if TYPE_CHECKING:
42-
from collections.abc import Sequence
42+
from collections.abc import (
43+
Generator,
44+
Sequence,
45+
)
4346

4447
from pandas._typing import (
4548
ArrayLike,
@@ -109,12 +112,10 @@ def concatenate_managers(
109112
out.axes = axes
110113
return out
111114

112-
concat_plan = _get_combined_plan(mgrs)
113-
114115
blocks = []
115116
values: ArrayLike
116117

117-
for placement, join_units in concat_plan:
118+
for placement, join_units in _get_combined_plan(mgrs):
118119
unit = join_units[0]
119120
blk = unit.block
120121

@@ -249,14 +250,12 @@ def _concat_homogeneous_fastpath(
249250

250251
def _get_combined_plan(
251252
mgrs: list[BlockManager],
252-
) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
253-
plan = []
254-
253+
) -> Generator[tuple[BlockPlacement, list[JoinUnit]], None, None]:
255254
max_len = mgrs[0].shape[0]
256255

257256
blknos_list = [mgr.blknos for mgr in mgrs]
258257
pairs = libinternals.get_concat_blkno_indexers(blknos_list)
259-
for ind, (blknos, bp) in enumerate(pairs):
258+
for blknos, bp in pairs:
260259
# assert bp.is_slice_like
261260
# assert len(bp) > 0
262261

@@ -268,9 +267,7 @@ def _get_combined_plan(
268267
unit = JoinUnit(nb)
269268
units_for_bp.append(unit)
270269

271-
plan.append((bp, units_for_bp))
272-
273-
return plan
270+
yield bp, units_for_bp
274271

275272

276273
def _get_block_for_concat_plan(

pandas/io/formats/info.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def size_qualifier(self) -> str:
422422
# categories)
423423
if (
424424
"object" in self.dtype_counts
425-
or self.data.index._is_memory_usage_qualified()
425+
or self.data.index._is_memory_usage_qualified
426426
):
427427
size_qualifier = "+"
428428
return size_qualifier

pandas/io/sql.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def _convert_arrays_to_dataframe(
177177
result_arrays.append(ArrowExtensionArray(pa_array))
178178
arrays = result_arrays # type: ignore[assignment]
179179
if arrays:
180-
df = DataFrame(dict(zip(list(range(len(columns))), arrays)))
180+
df = DataFrame(dict(zip(range(len(columns)), arrays)))
181181
df.columns = columns
182182
return df
183183
else:

0 commit comments

Comments
 (0)