-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
CLN/TYP: pandas/io/formats/excel.py #37862
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 12 commits
8d5d4bb
b066f5f
3484f53
5c2f160
ee884ca
dab9e15
1ff8cb8
ee09d4d
c5e1d9c
7d27a71
2f02811
a63b7d1
9fcfb32
aa48fb6
f4abe27
f468397
4cf8f83
084e953
8a9a40a
2ee661c
c88aff9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,7 +5,7 @@ | |
from functools import reduce | ||
import itertools | ||
import re | ||
from typing import Callable, Dict, Mapping, Optional, Sequence, Union | ||
from typing import Callable, Dict, Iterator, Mapping, Optional, Sequence, Union, cast | ||
import warnings | ||
|
||
import numpy as np | ||
|
@@ -14,7 +14,6 @@ | |
|
||
from pandas.core.dtypes import missing | ||
from pandas.core.dtypes.common import is_float, is_scalar | ||
from pandas.core.dtypes.generic import ABCIndex | ||
|
||
from pandas import DataFrame, Index, MultiIndex, PeriodIndex | ||
import pandas.core.common as com | ||
|
@@ -29,7 +28,13 @@ class ExcelCell: | |
__slots__ = __fields__ | ||
|
||
def __init__( | ||
self, row: int, col: int, val, style=None, mergestart=None, mergeend=None | ||
self, | ||
row: int, | ||
col: int, | ||
val, | ||
style=None, | ||
mergestart: Optional[int] = None, | ||
mergeend: Optional[int] = None, | ||
): | ||
self.row = row | ||
self.col = col | ||
|
@@ -522,16 +527,15 @@ def _format_value(self, val): | |
) | ||
return val | ||
|
||
def _format_header_mi(self): | ||
def _format_header_mi(self) -> Iterator[ExcelCell]: | ||
if self.columns.nlevels > 1: | ||
if not self.index: | ||
raise NotImplementedError( | ||
"Writing to Excel with MultiIndex columns and no " | ||
"index ('index'=False) is not yet implemented." | ||
) | ||
|
||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if not (has_aliases or self.header): | ||
if not (self._has_aliases or self.header): | ||
return | ||
|
||
columns = self.columns | ||
|
@@ -547,28 +551,27 @@ def _format_header_mi(self): | |
|
||
if self.merge_cells: | ||
# Format multi-index as a merged cells. | ||
for lnum in range(len(level_lengths)): | ||
name = columns.names[lnum] | ||
yield ExcelCell(lnum, coloffset, name, self.header_style) | ||
for lnum, name in enumerate(columns.names): | ||
yield ExcelCell( | ||
row=lnum, | ||
col=coloffset, | ||
val=name, | ||
style=self.header_style, | ||
) | ||
|
||
for lnum, (spans, levels, level_codes) in enumerate( | ||
zip(level_lengths, columns.levels, columns.codes) | ||
): | ||
values = levels.take(level_codes) | ||
for i in spans: | ||
if spans[i] > 1: | ||
yield ExcelCell( | ||
lnum, | ||
coloffset + i + 1, | ||
values[i], | ||
self.header_style, | ||
lnum, | ||
coloffset + i + spans[i], | ||
) | ||
else: | ||
yield ExcelCell( | ||
lnum, coloffset + i + 1, values[i], self.header_style | ||
) | ||
yield ExcelCell( | ||
row=lnum, | ||
col=coloffset + i + 1, | ||
val=values[i], | ||
style=self.header_style, | ||
mergestart=lnum if spans[i] > 1 else None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am a bit confused what There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we are always multilevel in this function. maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are good names indeed. Thank you! |
||
mergeend=coloffset + i + spans[i] if spans[i] > 1 else None, | ||
) | ||
else: | ||
# Format in legacy format with dots to indicate levels. | ||
for i, values in enumerate(zip(*level_strs)): | ||
|
@@ -577,9 +580,8 @@ def _format_header_mi(self): | |
|
||
self.rowcounter = lnum | ||
|
||
def _format_header_regular(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_header_regular(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
coloffset = 0 | ||
|
||
if self.index: | ||
|
@@ -588,17 +590,11 @@ def _format_header_regular(self): | |
coloffset = len(self.df.index[0]) | ||
|
||
colnames = self.columns | ||
if has_aliases: | ||
# pandas\io\formats\excel.py:593: error: Argument 1 to "len" | ||
# has incompatible type "Union[Sequence[Optional[Hashable]], | ||
# bool]"; expected "Sized" [arg-type] | ||
if len(self.header) != len(self.columns): # type: ignore[arg-type] | ||
# pandas\io\formats\excel.py:602: error: Argument 1 to | ||
# "len" has incompatible type | ||
# "Union[Sequence[Optional[Hashable]], bool]"; expected | ||
# "Sized" [arg-type] | ||
if self._has_aliases: | ||
self.header = cast(Sequence, self.header) | ||
if len(self.header) != len(self.columns): | ||
raise ValueError( | ||
f"Writing {len(self.columns)} cols " # type: ignore[arg-type] | ||
f"Writing {len(self.columns)} cols " | ||
f"but got {len(self.header)} aliases" | ||
) | ||
else: | ||
|
@@ -609,7 +605,7 @@ def _format_header_regular(self): | |
self.rowcounter, colindex + coloffset, colname, self.header_style | ||
) | ||
|
||
def _format_header(self): | ||
def _format_header(self) -> Iterator[ExcelCell]: | ||
if isinstance(self.columns, MultiIndex): | ||
gen = self._format_header_mi() | ||
else: | ||
|
@@ -631,15 +627,14 @@ def _format_header(self): | |
self.rowcounter += 1 | ||
return itertools.chain(gen, gen2) | ||
|
||
def _format_body(self): | ||
def _format_body(self) -> Iterator[ExcelCell]: | ||
if isinstance(self.df.index, MultiIndex): | ||
return self._format_hierarchical_rows() | ||
else: | ||
return self._format_regular_rows() | ||
|
||
def _format_regular_rows(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_regular_rows(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
self.rowcounter += 1 | ||
|
||
# output index and index_label? | ||
|
@@ -676,9 +671,8 @@ def _format_regular_rows(self): | |
|
||
yield from self._generate_body(coloffset) | ||
|
||
def _format_hierarchical_rows(self): | ||
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)) | ||
if has_aliases or self.header: | ||
def _format_hierarchical_rows(self) -> Iterator[ExcelCell]: | ||
if self._has_aliases or self.header: | ||
self.rowcounter += 1 | ||
|
||
gcolidx = 0 | ||
|
@@ -722,39 +716,40 @@ def _format_hierarchical_rows(self): | |
) | ||
|
||
for i in spans: | ||
if spans[i] > 1: | ||
yield ExcelCell( | ||
self.rowcounter + i, | ||
gcolidx, | ||
values[i], | ||
self.header_style, | ||
self.rowcounter + i + spans[i] - 1, | ||
gcolidx, | ||
) | ||
else: | ||
yield ExcelCell( | ||
self.rowcounter + i, | ||
gcolidx, | ||
values[i], | ||
self.header_style, | ||
) | ||
yield ExcelCell( | ||
row=self.rowcounter + i, | ||
col=gcolidx, | ||
val=values[i], | ||
style=self.header_style, | ||
mergestart=( | ||
self.rowcounter + i + spans[i] - 1 | ||
ivanovmg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if spans[i] > 1 | ||
else None | ||
), | ||
mergeend=gcolidx if spans[i] > 1 else None, | ||
) | ||
gcolidx += 1 | ||
|
||
else: | ||
# Format hierarchical rows with non-merged values. | ||
for indexcolvals in zip(*self.df.index): | ||
for idx, indexcolval in enumerate(indexcolvals): | ||
yield ExcelCell( | ||
self.rowcounter + idx, | ||
gcolidx, | ||
indexcolval, | ||
self.header_style, | ||
row=self.rowcounter + idx, | ||
col=gcolidx, | ||
val=indexcolval, | ||
style=self.header_style, | ||
) | ||
gcolidx += 1 | ||
|
||
yield from self._generate_body(gcolidx) | ||
|
||
def _generate_body(self, coloffset: int): | ||
@property | ||
def _has_aliases(self) -> bool: | ||
"""Whether the aliases for column names are present.""" | ||
return not isinstance(self.header, bool) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. thinking some more, maybe best not to use bool. we've never really had a discussion on use of bool in type annotations, but sometimes we are actually accepting any truthy value and other times we explicitly check if x is False or if x is True. the header parameter of the docstring of ExcelFormatter should probably be updated to match. It currently is
even though (tuple, list, np.ndarray, ABCIndex) is the current accepted types on master. and the current mypy issue is that header is annotated as perhaps use is_list_like here. (ignoring that a str is a Sequence) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I replaced with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. there is still a discrepancy. Can a sequence of any hashable be passed or does it have to be a sequence of strings. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I also looked through the test base and noticed that header is either bool or some sequence of strings (on some occasions). I am not aware of other use cases, when header is a sequence of any hashables rather than strings. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok can leave as is for now. The docstrings of public methods document the public api, the type annotations should normally be as permissible as possible for argument positions and the docstring and type annotations should match. we don't want to change the public api without discussion, so can circle back to this. |
||
|
||
def _generate_body(self, coloffset: int) -> Iterator[ExcelCell]: | ||
if self.styler is None: | ||
styles = None | ||
else: | ||
|
@@ -771,7 +766,7 @@ def _generate_body(self, coloffset: int): | |
xlstyle = self.style_converter(";".join(styles[i, colidx])) | ||
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle) | ||
|
||
def get_formatted_cells(self): | ||
def get_formatted_cells(self) -> Iterator[ExcelCell]: | ||
for cell in itertools.chain(self._format_header(), self._format_body()): | ||
cell.val = self._format_value(cell.val) | ||
yield cell | ||
|
Uh oh!
There was an error while loading. Please reload this page.