Skip to content

REF: Clean up some iterator usages #58267

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Apr 16, 2024
12 changes: 5 additions & 7 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
holidays = holidays + calendar.holidays().tolist()
except AttributeError:
pass
holidays = [_to_dt64D(dt) for dt in holidays]
holidays = tuple(sorted(holidays))
holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))

kwargs = {"weekmask": weekmask}
if holidays:
Expand Down Expand Up @@ -419,11 +418,10 @@ cdef class BaseOffset:

if "holidays" in all_paras and not all_paras["holidays"]:
all_paras.pop("holidays")
exclude = ["kwds", "name", "calendar"]
attrs = [(k, v) for k, v in all_paras.items()
if (k not in exclude) and (k[0] != "_")]
attrs = sorted(set(attrs))
params = tuple([str(type(self))] + attrs)
exclude = {"kwds", "name", "calendar"}
attrs = {(k, v) for k, v in all_paras.items()
if (k not in exclude) and (k[0] != "_")}
params = tuple([str(type(self))] + sorted(attrs))
return params

@property
Expand Down
27 changes: 14 additions & 13 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2301,8 +2301,8 @@ def maybe_reorder(
exclude.update(index)

if any(exclude):
arr_exclude = [x for x in exclude if x in arr_columns]
to_remove = [arr_columns.get_loc(col) for col in arr_exclude]
arr_exclude = (x for x in exclude if x in arr_columns)
to_remove = {arr_columns.get_loc(col) for col in arr_exclude}
arrays = [v for i, v in enumerate(arrays) if i not in to_remove]

columns = columns.drop(exclude)
Expand Down Expand Up @@ -3705,7 +3705,7 @@ def transpose(
nv.validate_transpose(args, {})
# construct the args

dtypes = list(self.dtypes)
first_dtype = self.dtypes.iloc[0] if len(self.columns) else None

if self._can_fast_transpose:
# Note: tests pass without this, but this improves perf quite a bit.
Expand All @@ -3723,11 +3723,11 @@ def transpose(

elif (
self._is_homogeneous_type
and dtypes
and isinstance(dtypes[0], ExtensionDtype)
and first_dtype is not None
and isinstance(first_dtype, ExtensionDtype)
):
new_values: list
if isinstance(dtypes[0], BaseMaskedDtype):
if isinstance(first_dtype, BaseMaskedDtype):
# We have masked arrays with the same dtype. We can transpose faster.
from pandas.core.arrays.masked import (
transpose_homogeneous_masked_arrays,
Expand All @@ -3736,7 +3736,7 @@ def transpose(
new_values = transpose_homogeneous_masked_arrays(
cast(Sequence[BaseMaskedArray], self._iter_column_arrays())
)
elif isinstance(dtypes[0], ArrowDtype):
elif isinstance(first_dtype, ArrowDtype):
# We have arrow EAs with the same dtype. We can transpose faster.
from pandas.core.arrays.arrow.array import (
ArrowExtensionArray,
Expand All @@ -3748,10 +3748,11 @@ def transpose(
)
else:
# We have other EAs with the same dtype. We preserve dtype in transpose.
dtyp = dtypes[0]
arr_typ = dtyp.construct_array_type()
arr_typ = first_dtype.construct_array_type()
values = self.values
new_values = [arr_typ._from_sequence(row, dtype=dtyp) for row in values]
new_values = [
arr_typ._from_sequence(row, dtype=first_dtype) for row in values
]

result = type(self)._from_arrays(
new_values,
Expand Down Expand Up @@ -5882,7 +5883,7 @@ def set_index(
else:
arrays.append(self.index)

to_remove: list[Hashable] = []
to_remove: set[Hashable] = set()
for col in keys:
if isinstance(col, MultiIndex):
arrays.extend(col._get_level_values(n) for n in range(col.nlevels))
Expand All @@ -5909,7 +5910,7 @@ def set_index(
arrays.append(frame[col])
names.append(col)
if drop:
to_remove.append(col)
to_remove.add(col)

if len(arrays[-1]) != len(self):
# check newest element against length of calling frame, since
Expand All @@ -5926,7 +5927,7 @@ def set_index(
raise ValueError(f"Index has duplicate keys: {duplicates}")

# use set to handle duplicate column names gracefully in case of drop
for c in set(to_remove):
for c in to_remove:
del frame[c]

# clear up memory usage
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2045,7 +2045,7 @@ def __setstate__(self, state) -> None:
# e.g. say fill_value needing _mgr to be
# defined
meta = set(self._internal_names + self._metadata)
for k in list(meta):
for k in meta:
if k in state and k != "_flags":
v = state[k]
object.__setattr__(self, k, v)
Expand Down
15 changes: 7 additions & 8 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ def _extract_index(data) -> Index:
if len(data) == 0:
return default_index(0)

raw_lengths = []
raw_lengths = set()
indexes: list[list[Hashable] | Index] = []

have_raw_arrays = False
Expand All @@ -583,7 +583,7 @@ def _extract_index(data) -> Index:
indexes.append(list(val.keys()))
elif is_list_like(val) and getattr(val, "ndim", 1) == 1:
have_raw_arrays = True
raw_lengths.append(len(val))
raw_lengths.add(len(val))
elif isinstance(val, np.ndarray) and val.ndim > 1:
raise ValueError("Per-column arrays must each be 1-dimensional")

Expand All @@ -596,24 +596,23 @@ def _extract_index(data) -> Index:
index = union_indexes(indexes, sort=False)

if have_raw_arrays:
lengths = list(set(raw_lengths))
if len(lengths) > 1:
if len(raw_lengths) > 1:
raise ValueError("All arrays must be of the same length")

if have_dicts:
raise ValueError(
"Mixing dicts with non-Series may lead to ambiguous ordering."
)

raw_length = raw_lengths.pop()
if have_series:
if lengths[0] != len(index):
if raw_length != len(index):
msg = (
f"array length {lengths[0]} does not match index "
f"array length {raw_length} does not match index "
f"length {len(index)}"
)
raise ValueError(msg)
else:
index = default_index(lengths[0])
index = default_index(raw_length)

return ensure_index(index)

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,18 +1124,18 @@ def f(value):

# we require at least Ymd
required = ["year", "month", "day"]
req = sorted(set(required) - set(unit_rev.keys()))
req = set(required) - set(unit_rev.keys())
if len(req):
_required = ",".join(req)
_required = ",".join(sorted(req))
raise ValueError(
"to assemble mappings requires at least that "
f"[year, month, day] be specified: [{_required}] is missing"
)

# keys we don't recognize
excess = sorted(set(unit_rev.keys()) - set(_unit_map.values()))
excess = set(unit_rev.keys()) - set(_unit_map.values())
if len(excess):
_excess = ",".join(excess)
_excess = ",".join(sorted(excess))
raise ValueError(
f"extra keys have been passed to the datetime assemblage: [{_excess}]"
)
Expand Down