Skip to content

Commit 1e3bf39

Browse files
authored
REF: Convert list comprehensions into lazy iterators (#58798)
1 parent 695b170 commit 1e3bf39

File tree

11 files changed

+62
-54
lines changed

11 files changed

+62
-54
lines changed

pandas/core/arraylike.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,8 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any)
329329
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
330330

331331
if self.ndim == 1:
332-
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
333-
name = names[0] if len(set(names)) == 1 else None
332+
names = {getattr(x, "name") for x in inputs if hasattr(x, "name")}
333+
name = names.pop() if len(names) == 1 else None
334334
reconstruct_kwargs = {"name": name}
335335
else:
336336
reconstruct_kwargs = {}

pandas/core/common.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -335,11 +335,12 @@ def is_empty_slice(obj) -> bool:
335335
)
336336

337337

338-
def is_true_slices(line) -> list[bool]:
338+
def is_true_slices(line: abc.Iterable) -> abc.Generator[bool, None, None]:
339339
"""
340-
Find non-trivial slices in "line": return a list of booleans with same length.
340+
Find non-trivial slices in "line": yields a bool.
341341
"""
342-
return [isinstance(k, slice) and not is_null_slice(k) for k in line]
342+
for k in line:
343+
yield isinstance(k, slice) and not is_null_slice(k)
343344

344345

345346
# TODO: used only once in indexing; belongs elsewhere?

pandas/core/dtypes/dtypes.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -680,10 +680,11 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
680680
return None
681681

682682
# categorical is aware of Sparse -> extract sparse subdtypes
683-
dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
683+
subtypes = (x.subtype if isinstance(x, SparseDtype) else x for x in dtypes)
684684
# extract the categories' dtype
685685
non_cat_dtypes = [
686-
x.categories.dtype if isinstance(x, CategoricalDtype) else x for x in dtypes
686+
x.categories.dtype if isinstance(x, CategoricalDtype) else x
687+
for x in subtypes
687688
]
688689
# TODO should categorical always give an answer?
689690
from pandas.core.dtypes.cast import find_common_type

pandas/core/frame.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -6999,19 +6999,19 @@ def sort_values(
69996999
f" != length of by ({len(by)})"
70007000
)
70017001
if len(by) > 1:
7002-
keys = [self._get_label_or_level_values(x, axis=axis) for x in by]
7002+
keys = (self._get_label_or_level_values(x, axis=axis) for x in by)
70037003

70047004
# need to rewrap columns in Series to apply key function
70057005
if key is not None:
7006-
# error: List comprehension has incompatible type List[Series];
7007-
# expected List[ndarray]
7008-
keys = [
7009-
Series(k, name=name) # type: ignore[misc]
7010-
for (k, name) in zip(keys, by)
7011-
]
7006+
keys_data = [Series(k, name=name) for (k, name) in zip(keys, by)]
7007+
else:
7008+
# error: Argument 1 to "list" has incompatible type
7009+
# "Generator[ExtensionArray | ndarray[Any, Any], None, None]";
7010+
# expected "Iterable[Series]"
7011+
keys_data = list(keys) # type: ignore[arg-type]
70127012

70137013
indexer = lexsort_indexer(
7014-
keys, orders=ascending, na_position=na_position, key=key
7014+
keys_data, orders=ascending, na_position=na_position, key=key
70157015
)
70167016
elif len(by):
70177017
# len(by) == 1

pandas/core/groupby/generic.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
387387
raise SpecificationError("nested renamer is not supported")
388388

389389
if any(isinstance(x, (tuple, list)) for x in arg):
390-
arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg]
390+
arg = ((x, x) if not isinstance(x, (tuple, list)) else x for x in arg)
391391
else:
392392
# list of functions / function names
393393
columns = (com.get_callable_name(f) or f for f in arg)
@@ -2077,7 +2077,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
20772077

20782078
obj = self._obj_with_exclusions
20792079
columns = obj.columns
2080-
sgbs = [
2080+
sgbs = (
20812081
SeriesGroupBy(
20822082
obj.iloc[:, i],
20832083
selection=colname,
@@ -2086,7 +2086,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
20862086
observed=self.observed,
20872087
)
20882088
for i, colname in enumerate(obj.columns)
2089-
]
2089+
)
20902090
results = [func(sgb) for sgb in sgbs]
20912091

20922092
if not len(results):

pandas/core/groupby/groupby.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ class providing the base-class of operations.
1111

1212
from collections.abc import (
1313
Hashable,
14+
Iterable,
1415
Iterator,
1516
Mapping,
1617
Sequence,
@@ -758,7 +759,7 @@ def get_converter(s):
758759
)
759760
raise ValueError(msg) from err
760761

761-
converters = [get_converter(s) for s in index_sample]
762+
converters = (get_converter(s) for s in index_sample)
762763
names = (tuple(f(n) for f, n in zip(converters, name)) for name in names)
763764

764765
else:
@@ -2645,7 +2646,7 @@ def _value_counts(
26452646
}
26462647
if isinstance(obj, Series):
26472648
_name = obj.name
2648-
keys = [] if _name in in_axis_names else [obj]
2649+
keys: Iterable[Series] = [] if _name in in_axis_names else [obj]
26492650
else:
26502651
unique_cols = set(obj.columns)
26512652
if subset is not None:
@@ -2665,12 +2666,12 @@ def _value_counts(
26652666
else:
26662667
subsetted = unique_cols
26672668

2668-
keys = [
2669+
keys = (
26692670
# Can't use .values because the column label needs to be preserved
26702671
obj.iloc[:, idx]
26712672
for idx, _name in enumerate(obj.columns)
26722673
if _name not in in_axis_names and _name in subsetted
2673-
]
2674+
)
26742675

26752676
groupings = list(self._grouper.groupings)
26762677
for key in keys:

pandas/core/indexes/api.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -212,20 +212,25 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
212212
if kind == "special":
213213
result = indexes[0]
214214

215-
dtis = [x for x in indexes if isinstance(x, DatetimeIndex)]
216-
dti_tzs = [x for x in dtis if x.tz is not None]
217-
if len(dti_tzs) not in [0, len(dtis)]:
215+
num_dtis = 0
216+
num_dti_tzs = 0
217+
for idx in indexes:
218+
if isinstance(idx, DatetimeIndex):
219+
num_dtis += 1
220+
if idx.tz is not None:
221+
num_dti_tzs += 1
222+
if num_dti_tzs not in [0, num_dtis]:
218223
# TODO: this behavior is not tested (so may not be desired),
219224
# but is kept in order to keep behavior the same when
220225
# deprecating union_many
221226
# test_frame_from_dict_with_mixed_indexes
222227
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
223228

224-
if len(dtis) == len(indexes):
229+
if num_dtis == len(indexes):
225230
sort = True
226231
result = indexes[0]
227232

228-
elif len(dtis) > 1:
233+
elif num_dtis > 1:
229234
# If we have mixed timezones, our casting behavior may depend on
230235
# the order of indexes, which we don't want.
231236
sort = False

pandas/core/indexes/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -3140,7 +3140,7 @@ def _union(self, other: Index, sort: bool | None):
31403140

31413141
# worth making this faster? a very unusual case
31423142
value_set = set(lvals)
3143-
value_list.extend([x for x in rvals if x not in value_set])
3143+
value_list.extend(x for x in rvals if x not in value_set)
31443144
# If objects are unorderable, we must have object dtype.
31453145
return np.array(value_list, dtype=object)
31463146

@@ -7620,8 +7620,8 @@ def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
76207620
list
76217621
A list representing the unanimous 'names' found.
76227622
"""
7623-
name_tups = [tuple(i.names) for i in indexes]
7624-
name_sets = [{*ns} for ns in zip_longest(*name_tups)]
7623+
name_tups = (tuple(i.names) for i in indexes)
7624+
name_sets = ({*ns} for ns in zip_longest(*name_tups))
76257625
names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
76267626
return names
76277627

pandas/core/indexes/multi.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1387,7 +1387,7 @@ def _formatter_func(self, tup):
13871387
"""
13881388
Formats each item in tup according to its level's formatter function.
13891389
"""
1390-
formatter_funcs = [level._formatter_func for level in self.levels]
1390+
formatter_funcs = (level._formatter_func for level in self.levels)
13911391
return tuple(func(val) for func, val in zip(formatter_funcs, tup))
13921392

13931393
def _get_values_for_csv(
@@ -1537,7 +1537,7 @@ def _set_names(self, names, *, level=None, validate: bool = True) -> None:
15371537
if level is None:
15381538
level = range(self.nlevels)
15391539
else:
1540-
level = [self._get_level_number(lev) for lev in level]
1540+
level = (self._get_level_number(lev) for lev in level)
15411541

15421542
# set the name
15431543
for lev, name in zip(level, names):

pandas/core/reshape/concat.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ def get_result(self):
560560

561561
# combine as columns in a frame
562562
else:
563-
data = dict(zip(range(len(self.objs)), self.objs))
563+
data = dict(enumerate(self.objs))
564564

565565
# GH28330 Preserves subclassed objects through concat
566566
cons = sample._constructor_expanddim
@@ -874,7 +874,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
874874

875875
if isinstance(new_index, MultiIndex):
876876
new_levels.extend(new_index.levels)
877-
new_codes.extend([np.tile(lab, kpieces) for lab in new_index.codes])
877+
new_codes.extend(np.tile(lab, kpieces) for lab in new_index.codes)
878878
else:
879879
new_levels.append(new_index.unique())
880880
single_codes = new_index.unique().get_indexer(new_index)

pandas/core/reshape/reshape.py

+20-20
Original file line numberDiff line numberDiff line change
@@ -137,24 +137,24 @@ def __init__(
137137
self.removed_level = self.removed_level.take(unique_codes)
138138
self.removed_level_full = self.removed_level_full.take(unique_codes)
139139

140-
# Bug fix GH 20601
141-
# If the data frame is too big, the number of unique index combination
142-
# will cause int32 overflow on windows environments.
143-
# We want to check and raise an warning before this happens
144-
num_rows = np.max([index_level.size for index_level in self.new_index_levels])
145-
num_columns = self.removed_level.size
146-
147-
# GH20601: This forces an overflow if the number of cells is too high.
148-
num_cells = num_rows * num_columns
149-
150-
# GH 26314: Previous ValueError raised was too restrictive for many users.
151-
if get_option("performance_warnings") and num_cells > np.iinfo(np.int32).max:
152-
warnings.warn(
153-
f"The following operation may generate {num_cells} cells "
154-
f"in the resulting pandas object.",
155-
PerformanceWarning,
156-
stacklevel=find_stack_level(),
157-
)
140+
if get_option("performance_warnings"):
141+
# Bug fix GH 20601
142+
# If the data frame is too big, the number of unique index combination
143+
# will cause int32 overflow on windows environments.
144+
# We want to check and raise an warning before this happens
145+
num_rows = max(index_level.size for index_level in self.new_index_levels)
146+
num_columns = self.removed_level.size
147+
148+
# GH20601: This forces an overflow if the number of cells is too high.
149+
# GH 26314: Previous ValueError raised was too restrictive for many users.
150+
num_cells = num_rows * num_columns
151+
if num_cells > np.iinfo(np.int32).max:
152+
warnings.warn(
153+
f"The following operation may generate {num_cells} cells "
154+
f"in the resulting pandas object.",
155+
PerformanceWarning,
156+
stacklevel=find_stack_level(),
157+
)
158158

159159
self._make_selectors()
160160

@@ -731,10 +731,10 @@ def _stack_multi_column_index(columns: MultiIndex) -> MultiIndex | Index:
731731
if len(columns.levels) <= 2:
732732
return columns.levels[0]._rename(name=columns.names[0])
733733

734-
levs = [
734+
levs = (
735735
[lev[c] if c >= 0 else None for c in codes]
736736
for lev, codes in zip(columns.levels[:-1], columns.codes[:-1])
737-
]
737+
)
738738

739739
# Remove duplicate tuples in the MultiIndex.
740740
tuples = zip(*levs)

0 commit comments

Comments
 (0)