Skip to content

Commit c8c90fe

Browse files
mroeschkepmhatre1
authored andcommitted
REF: Clean up some iterator usages (pandas-dev#58267)
* Use better data structures * Use generator and set * Move sorted to exception block, use set instead of list * Another iterator, use iter * another set * Dont use iterator protocol
1 parent 3239570 commit c8c90fe

File tree

5 files changed

+31
-33
lines changed

5 files changed

+31
-33
lines changed

pandas/_libs/tslibs/offsets.pyx

+5-7
Original file line numberDiff line numberDiff line change
@@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
219219
holidays = holidays + calendar.holidays().tolist()
220220
except AttributeError:
221221
pass
222-
holidays = [_to_dt64D(dt) for dt in holidays]
223-
holidays = tuple(sorted(holidays))
222+
holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))
224223

225224
kwargs = {"weekmask": weekmask}
226225
if holidays:
@@ -419,11 +418,10 @@ cdef class BaseOffset:
419418

420419
if "holidays" in all_paras and not all_paras["holidays"]:
421420
all_paras.pop("holidays")
422-
exclude = ["kwds", "name", "calendar"]
423-
attrs = [(k, v) for k, v in all_paras.items()
424-
if (k not in exclude) and (k[0] != "_")]
425-
attrs = sorted(set(attrs))
426-
params = tuple([str(type(self))] + attrs)
421+
exclude = {"kwds", "name", "calendar"}
422+
attrs = {(k, v) for k, v in all_paras.items()
423+
if (k not in exclude) and (k[0] != "_")}
424+
params = tuple([str(type(self))] + sorted(attrs))
427425
return params
428426

429427
@property

pandas/core/frame.py

+14-13
Original file line numberDiff line numberDiff line change
@@ -2301,8 +2301,8 @@ def maybe_reorder(
23012301
exclude.update(index)
23022302

23032303
if any(exclude):
2304-
arr_exclude = [x for x in exclude if x in arr_columns]
2305-
to_remove = [arr_columns.get_loc(col) for col in arr_exclude]
2304+
arr_exclude = (x for x in exclude if x in arr_columns)
2305+
to_remove = {arr_columns.get_loc(col) for col in arr_exclude}
23062306
arrays = [v for i, v in enumerate(arrays) if i not in to_remove]
23072307

23082308
columns = columns.drop(exclude)
@@ -3705,7 +3705,7 @@ def transpose(
37053705
nv.validate_transpose(args, {})
37063706
# construct the args
37073707

3708-
dtypes = list(self.dtypes)
3708+
first_dtype = self.dtypes.iloc[0] if len(self.columns) else None
37093709

37103710
if self._can_fast_transpose:
37113711
# Note: tests pass without this, but this improves perf quite a bit.
@@ -3723,11 +3723,11 @@ def transpose(
37233723

37243724
elif (
37253725
self._is_homogeneous_type
3726-
and dtypes
3727-
and isinstance(dtypes[0], ExtensionDtype)
3726+
and first_dtype is not None
3727+
and isinstance(first_dtype, ExtensionDtype)
37283728
):
37293729
new_values: list
3730-
if isinstance(dtypes[0], BaseMaskedDtype):
3730+
if isinstance(first_dtype, BaseMaskedDtype):
37313731
# We have masked arrays with the same dtype. We can transpose faster.
37323732
from pandas.core.arrays.masked import (
37333733
transpose_homogeneous_masked_arrays,
@@ -3736,7 +3736,7 @@ def transpose(
37363736
new_values = transpose_homogeneous_masked_arrays(
37373737
cast(Sequence[BaseMaskedArray], self._iter_column_arrays())
37383738
)
3739-
elif isinstance(dtypes[0], ArrowDtype):
3739+
elif isinstance(first_dtype, ArrowDtype):
37403740
# We have arrow EAs with the same dtype. We can transpose faster.
37413741
from pandas.core.arrays.arrow.array import (
37423742
ArrowExtensionArray,
@@ -3748,10 +3748,11 @@ def transpose(
37483748
)
37493749
else:
37503750
# We have other EAs with the same dtype. We preserve dtype in transpose.
3751-
dtyp = dtypes[0]
3752-
arr_typ = dtyp.construct_array_type()
3751+
arr_typ = first_dtype.construct_array_type()
37533752
values = self.values
3754-
new_values = [arr_typ._from_sequence(row, dtype=dtyp) for row in values]
3753+
new_values = [
3754+
arr_typ._from_sequence(row, dtype=first_dtype) for row in values
3755+
]
37553756

37563757
result = type(self)._from_arrays(
37573758
new_values,
@@ -5882,7 +5883,7 @@ def set_index(
58825883
else:
58835884
arrays.append(self.index)
58845885

5885-
to_remove: list[Hashable] = []
5886+
to_remove: set[Hashable] = set()
58865887
for col in keys:
58875888
if isinstance(col, MultiIndex):
58885889
arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
@@ -5909,7 +5910,7 @@ def set_index(
59095910
arrays.append(frame[col])
59105911
names.append(col)
59115912
if drop:
5912-
to_remove.append(col)
5913+
to_remove.add(col)
59135914

59145915
if len(arrays[-1]) != len(self):
59155916
# check newest element against length of calling frame, since
@@ -5926,7 +5927,7 @@ def set_index(
59265927
raise ValueError(f"Index has duplicate keys: {duplicates}")
59275928

59285929
# use set to handle duplicate column names gracefully in case of drop
5929-
for c in set(to_remove):
5930+
for c in to_remove:
59305931
del frame[c]
59315932

59325933
# clear up memory usage

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2045,7 +2045,7 @@ def __setstate__(self, state) -> None:
20452045
# e.g. say fill_value needing _mgr to be
20462046
# defined
20472047
meta = set(self._internal_names + self._metadata)
2048-
for k in list(meta):
2048+
for k in meta:
20492049
if k in state and k != "_flags":
20502050
v = state[k]
20512051
object.__setattr__(self, k, v)

pandas/core/internals/construction.py

+7-8
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,7 @@ def _extract_index(data) -> Index:
567567
if len(data) == 0:
568568
return default_index(0)
569569

570-
raw_lengths = []
570+
raw_lengths = set()
571571
indexes: list[list[Hashable] | Index] = []
572572

573573
have_raw_arrays = False
@@ -583,7 +583,7 @@ def _extract_index(data) -> Index:
583583
indexes.append(list(val.keys()))
584584
elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
585585
have_raw_arrays = True
586-
raw_lengths.append(len(val))
586+
raw_lengths.add(len(val))
587587
elif isinstance(val, np.ndarray) and val.ndim > 1:
588588
raise ValueError("Per-column arrays must each be 1-dimensional")
589589

@@ -596,24 +596,23 @@ def _extract_index(data) -> Index:
596596
index = union_indexes(indexes, sort=False)
597597

598598
if have_raw_arrays:
599-
lengths = list(set(raw_lengths))
600-
if len(lengths) > 1:
599+
if len(raw_lengths) > 1:
601600
raise ValueError("All arrays must be of the same length")
602601

603602
if have_dicts:
604603
raise ValueError(
605604
"Mixing dicts with non-Series may lead to ambiguous ordering."
606605
)
607-
606+
raw_length = raw_lengths.pop()
608607
if have_series:
609-
if lengths[0] != len(index):
608+
if raw_length != len(index):
610609
msg = (
611-
f"array length {lengths[0]} does not match index "
610+
f"array length {raw_length} does not match index "
612611
f"length {len(index)}"
613612
)
614613
raise ValueError(msg)
615614
else:
616-
index = default_index(lengths[0])
615+
index = default_index(raw_length)
617616

618617
return ensure_index(index)
619618

pandas/core/tools/datetimes.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1124,18 +1124,18 @@ def f(value):
11241124

11251125
# we require at least Ymd
11261126
required = ["year", "month", "day"]
1127-
req = sorted(set(required) - set(unit_rev.keys()))
1127+
req = set(required) - set(unit_rev.keys())
11281128
if len(req):
1129-
_required = ",".join(req)
1129+
_required = ",".join(sorted(req))
11301130
raise ValueError(
11311131
"to assemble mappings requires at least that "
11321132
f"[year, month, day] be specified: [{_required}] is missing"
11331133
)
11341134

11351135
# keys we don't recognize
1136-
excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
1136+
excess = set(unit_rev.keys()) - set(_unit_map.values())
11371137
if len(excess):
1138-
_excess = ",".join(excess)
1138+
_excess = ",".join(sorted(excess))
11391139
raise ValueError(
11401140
f"extra keys have been passed to the datetime assemblage: [{_excess}]"
11411141
)

0 commit comments

Comments
 (0)