Skip to content

CLN: Use generators where list results are re-iterated over #58296

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1286,11 +1286,10 @@ def _insert_inaxis_grouper(
)

# zip in reverse so we can always insert at loc 0
for level, (name, lev, in_axis) in enumerate(
for level, (name, lev) in enumerate(
zip(
reversed(self._grouper.names),
reversed(self._grouper.get_group_levels()),
reversed([grp.in_axis for grp in self._grouper.groupings]),
self._grouper.get_group_levels(),
)
):
if name is None:
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@

if TYPE_CHECKING:
from collections.abc import (
Generator,
Hashable,
Iterator,
Sequence,
Expand Down Expand Up @@ -857,16 +858,15 @@ def _unob_index_and_ids(
return unob_index, unob_ids

@final
def get_group_levels(self) -> list[Index]:
def get_group_levels(self) -> Generator[Index, None, None]:
# Note: only called from _insert_inaxis_grouper, which
# is only called for BaseGrouper, never for BinGrouper
result_index = self.result_index
if len(self.groupings) == 1:
return [result_index]
return [
result_index.get_level_values(level)
for level in range(result_index.nlevels)
]
yield result_index
else:
for level in range(result_index.nlevels - 1, -1, -1):
yield result_index.get_level_values(level)

# ------------------------------------------------------------
# Aggregation functions
Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4919,6 +4919,7 @@ def _validate_fill_value(self, value):
raise TypeError
return value

@property
def _is_memory_usage_qualified(self) -> bool:
"""
Return a boolean if we need a qualified .info display.
Expand Down
8 changes: 2 additions & 6 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,13 +1351,14 @@ def __contains__(self, key: Any) -> bool:
def dtype(self) -> np.dtype:
return np.dtype("O")

@cache_readonly
def _is_memory_usage_qualified(self) -> bool:
"""return a boolean if we need a qualified .info display"""

def f(level) -> bool:
return "mixed" in level or "string" in level or "unicode" in level

return any(f(level) for level in self._inferred_type_levels)
return any(f(level.inferred_type) for level in self.levels)

# Cannot determine type of "memory_usage"
@doc(Index.memory_usage) # type: ignore[has-type]
Expand Down Expand Up @@ -1659,11 +1660,6 @@ def is_monotonic_decreasing(self) -> bool:
# monotonic decreasing if and only if reverse is monotonic increasing
return self[::-1].is_monotonic_increasing

@cache_readonly
def _inferred_type_levels(self) -> list[str]:
"""return a list of the inferred types, one for each level"""
return [i.inferred_type for i in self.levels]

@doc(Index.duplicated)
def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
shape = tuple(len(lev) for lev in self.levels)
Expand Down
19 changes: 8 additions & 11 deletions pandas/core/internals/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,10 @@
)

if TYPE_CHECKING:
from collections.abc import Sequence
from collections.abc import (
Generator,
Sequence,
)

from pandas._typing import (
ArrayLike,
Expand Down Expand Up @@ -118,12 +121,10 @@ def concatenate_managers(
out.axes = axes
return out

concat_plan = _get_combined_plan(mgrs)

blocks = []
values: ArrayLike

for placement, join_units in concat_plan:
for placement, join_units in _get_combined_plan(mgrs):
unit = join_units[0]
blk = unit.block

Expand Down Expand Up @@ -258,14 +259,12 @@ def _concat_homogeneous_fastpath(

def _get_combined_plan(
mgrs: list[BlockManager],
) -> list[tuple[BlockPlacement, list[JoinUnit]]]:
plan = []

) -> Generator[tuple[BlockPlacement, list[JoinUnit]], None, None]:
max_len = mgrs[0].shape[0]

blknos_list = [mgr.blknos for mgr in mgrs]
pairs = libinternals.get_concat_blkno_indexers(blknos_list)
for ind, (blknos, bp) in enumerate(pairs):
for blknos, bp in pairs:
# assert bp.is_slice_like
# assert len(bp) > 0

Expand All @@ -277,9 +276,7 @@ def _get_combined_plan(
unit = JoinUnit(nb)
units_for_bp.append(unit)

plan.append((bp, units_for_bp))

return plan
yield bp, units_for_bp


def _get_block_for_concat_plan(
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/formats/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def size_qualifier(self) -> str:
# categories)
if (
"object" in self.dtype_counts
or self.data.index._is_memory_usage_qualified()
or self.data.index._is_memory_usage_qualified
):
size_qualifier = "+"
return size_qualifier
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ def _convert_arrays_to_dataframe(
result_arrays.append(ArrowExtensionArray(pa_array))
arrays = result_arrays # type: ignore[assignment]
if arrays:
df = DataFrame(dict(zip(list(range(len(columns))), arrays)))
df = DataFrame(dict(zip(range(len(columns)), arrays)))
df.columns = columns
return df
else:
Expand Down