Skip to content

CLN: Make iterators lazier #58200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1710,9 +1710,9 @@ def normalize_keyword_aggregation(
# TODO: aggspec type: typing.Dict[str, List[AggScalar]]
aggspec = defaultdict(list)
order = []
columns, pairs = list(zip(*kwargs.items()))
columns = tuple(kwargs.keys())

for column, aggfunc in pairs:
for column, aggfunc in kwargs.values():
aggspec[column].append(aggfunc)
order.append((column, com.get_callable_name(aggfunc) or aggfunc))

Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6168,12 +6168,13 @@ class max type
names = self.index._get_default_index_names(names, default)

if isinstance(self.index, MultiIndex):
to_insert = zip(self.index.levels, self.index.codes)
to_insert = zip(reversed(self.index.levels), reversed(self.index.codes))
else:
to_insert = ((self.index, None),)

multi_col = isinstance(self.columns, MultiIndex)
for i, (lev, lab) in reversed(list(enumerate(to_insert))):
for j, (lev, lab) in enumerate(to_insert, start=1):
i = self.index.nlevels - j
if level is not None and i not in level:
continue
name = names[i]
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,7 +706,7 @@ def groups(self) -> dict[Hashable, Index]:
return self.groupings[0].groups
result_index, ids = self.result_index_and_ids
values = result_index._values
categories = Categorical(ids, categories=np.arange(len(result_index)))
categories = Categorical(ids, categories=range(len(result_index)))
result = {
# mypy is not aware that group has to be an integer
values[group]: self.axis.take(axis_ilocs) # type: ignore[call-overload]
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ def __setitem__(self, key, value) -> None:

check_dict_or_set_indexers(key)
if isinstance(key, tuple):
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = (list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
else:
maybe_callable = com.apply_if_callable(key, self.obj)
Expand Down Expand Up @@ -1177,7 +1177,7 @@ def _check_deprecated_callable_usage(self, key: Any, maybe_callable: T) -> T:
def __getitem__(self, key):
check_dict_or_set_indexers(key)
if type(key) is tuple:
key = tuple(list(x) if is_iterator(x) else x for x in key)
key = (list(x) if is_iterator(x) else x for x in key)
key = tuple(com.apply_if_callable(x, self.obj) for x in key)
if self._is_scalar_access(key):
return self.obj._get_value(*key, takeable=self._takeable)
Expand Down
2 changes: 0 additions & 2 deletions pandas/core/sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,6 @@ def maybe_lift(lab, size: int) -> tuple[np.ndarray, int]:
for i, (lab, size) in enumerate(zip(labels, shape)):
labels[i], lshape[i] = maybe_lift(lab, size)

labels = list(labels)

# Iteratively process all the labels in chunks sized so less
# than lib.i8max unique int ids will be required for each chunk
while True:
Expand Down