Skip to content

Commit 7868a58

Browse files
authored
REF: Use more lazy iterators (#58808)
1 parent 2aa155a commit 7868a58

File tree

6 files changed

+43
-44
lines changed

6 files changed

+43
-44
lines changed

pandas/core/apply.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ def _apply_str(self, obj, func: str, *args, **kwargs):
664664
# people may aggregate on a non-callable attribute
665665
# but don't let them think they can pass args to it
666666
assert len(args) == 0
667-
assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
667+
assert not any(kwarg == "axis" for kwarg in kwargs)
668668
return f
669669
elif hasattr(np, func) and hasattr(obj, "__array__"):
670670
# in particular exclude Window

pandas/core/generic.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1750,19 +1750,25 @@ def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike
17501750
if `key` matches multiple labels
17511751
"""
17521752
axis = self._get_axis_number(axis)
1753-
other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
1753+
first_other_axes = next(
1754+
(ax for ax in range(self._AXIS_LEN) if ax != axis), None
1755+
)
17541756

17551757
if self._is_label_reference(key, axis=axis):
17561758
self._check_label_or_level_ambiguity(key, axis=axis)
1757-
values = self.xs(key, axis=other_axes[0])._values
1759+
if first_other_axes is None:
1760+
raise ValueError("axis matched all axes")
1761+
values = self.xs(key, axis=first_other_axes)._values
17581762
elif self._is_level_reference(key, axis=axis):
17591763
values = self.axes[axis].get_level_values(key)._values
17601764
else:
17611765
raise KeyError(key)
17621766

17631767
# Check for duplicates
17641768
if values.ndim > 1:
1765-
if other_axes and isinstance(self._get_axis(other_axes[0]), MultiIndex):
1769+
if first_other_axes is not None and isinstance(
1770+
self._get_axis(first_other_axes), MultiIndex
1771+
):
17661772
multi_message = (
17671773
"\n"
17681774
"For a multi-index, the label must be a "

pandas/io/excel/_base.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -857,24 +857,23 @@ def _parse_sheet(
857857
# a row containing just the index name(s)
858858
has_index_names = False
859859
if is_list_header and not is_len_one_list_header and index_col is not None:
860-
index_col_list: Sequence[int]
860+
index_col_set: set[int]
861861
if isinstance(index_col, int):
862-
index_col_list = [index_col]
862+
index_col_set = {index_col}
863863
else:
864864
assert isinstance(index_col, Sequence)
865-
index_col_list = index_col
865+
index_col_set = set(index_col)
866866

867867
# We have to handle mi without names. If any of the entries in the data
868868
# columns are not empty, this is a regular row
869869
assert isinstance(header, Sequence)
870870
if len(header) < len(data):
871871
potential_index_names = data[len(header)]
872-
potential_data = [
873-
x
872+
has_index_names = all(
873+
x == "" or x is None
874874
for i, x in enumerate(potential_index_names)
875-
if not control_row[i] and i not in index_col_list
876-
]
877-
has_index_names = all(x == "" or x is None for x in potential_data)
875+
if not control_row[i] and i not in index_col_set
876+
)
878877

879878
if is_list_like(index_col):
880879
# Forward fill values for MultiIndex index.
@@ -1457,9 +1456,9 @@ def inspect_excel_format(
14571456
with zipfile.ZipFile(stream) as zf:
14581457
# Workaround for some third party files that use forward slashes and
14591458
# lower case names.
1460-
component_names = [
1459+
component_names = {
14611460
name.replace("\\", "/").lower() for name in zf.namelist()
1462-
]
1461+
}
14631462

14641463
if "xl/workbook.xml" in component_names:
14651464
return "xlsx"

pandas/io/excel/_odfreader.py

+16-20
Original file line numberDiff line numberDiff line change
@@ -122,29 +122,25 @@ def get_sheet_data(
122122
table: list[list[Scalar | NaTType]] = []
123123

124124
for sheet_row in sheet_rows:
125-
sheet_cells = [
126-
x
127-
for x in sheet_row.childNodes
128-
if hasattr(x, "qname") and x.qname in cell_names
129-
]
130125
empty_cells = 0
131126
table_row: list[Scalar | NaTType] = []
132127

133-
for sheet_cell in sheet_cells:
134-
if sheet_cell.qname == table_cell_name:
135-
value = self._get_cell_value(sheet_cell)
136-
else:
137-
value = self.empty_value
138-
139-
column_repeat = self._get_column_repeat(sheet_cell)
140-
141-
# Queue up empty values, writing only if content succeeds them
142-
if value == self.empty_value:
143-
empty_cells += column_repeat
144-
else:
145-
table_row.extend([self.empty_value] * empty_cells)
146-
empty_cells = 0
147-
table_row.extend([value] * column_repeat)
128+
for sheet_cell in sheet_row.childNodes:
129+
if hasattr(sheet_cell, "qname") and sheet_cell.qname in cell_names:
130+
if sheet_cell.qname == table_cell_name:
131+
value = self._get_cell_value(sheet_cell)
132+
else:
133+
value = self.empty_value
134+
135+
column_repeat = self._get_column_repeat(sheet_cell)
136+
137+
# Queue up empty values, writing only if content succeeds them
138+
if value == self.empty_value:
139+
empty_cells += column_repeat
140+
else:
141+
table_row.extend([self.empty_value] * empty_cells)
142+
empty_cells = 0
143+
table_row.extend([value] * column_repeat)
148144

149145
if max_row_len < len(table_row):
150146
max_row_len = len(table_row)

pandas/io/excel/_xlrd.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,13 @@ def _parse_cell(cell_contents, cell_typ):
128128
cell_contents = val
129129
return cell_contents
130130

131-
data = []
132-
133131
nrows = sheet.nrows
134132
if file_rows_needed is not None:
135133
nrows = min(nrows, file_rows_needed)
136-
for i in range(nrows):
137-
row = [
134+
return [
135+
[
138136
_parse_cell(value, typ)
139137
for value, typ in zip(sheet.row_values(i), sheet.row_types(i))
140138
]
141-
data.append(row)
142-
143-
return data
139+
for i in range(nrows)
140+
]

pandas/io/sql.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ def _convert_arrays_to_dataframe(
157157
dtype_backend: DtypeBackend | Literal["numpy"] = "numpy",
158158
) -> DataFrame:
159159
content = lib.to_object_array_tuples(data)
160+
idx_len = content.shape[0]
160161
arrays = convert_object_array(
161162
list(content.T),
162163
dtype=None,
@@ -177,9 +178,9 @@ def _convert_arrays_to_dataframe(
177178
result_arrays.append(ArrowExtensionArray(pa_array))
178179
arrays = result_arrays # type: ignore[assignment]
179180
if arrays:
180-
df = DataFrame(dict(zip(range(len(columns)), arrays)))
181-
df.columns = columns
182-
return df
181+
return DataFrame._from_arrays(
182+
arrays, columns=columns, index=range(idx_len), verify_integrity=False
183+
)
183184
else:
184185
return DataFrame(columns=columns)
185186

0 commit comments

Comments
 (0)