Skip to content

CLN/TYP: pandas/io/formats/excel.py #37862

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 21 commits into from
Nov 24, 2020
Merged
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 58 additions & 59 deletions pandas/io/formats/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from functools import reduce
import itertools
import re
from typing import Callable, Dict, Mapping, Optional, Sequence, Union
from typing import Callable, Dict, Iterator, Mapping, Optional, Sequence, Sized, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -522,16 +522,15 @@ def _format_value(self, val):
)
return val

def _format_header_mi(self):
def _format_header_mi(self) -> Iterator[ExcelCell]:
if self.columns.nlevels > 1:
if not self.index:
raise NotImplementedError(
"Writing to Excel with MultiIndex columns and no "
"index ('index'=False) is not yet implemented."
)

has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
if not (has_aliases or self.header):
if not (self._has_aliases or self.header):
return

columns = self.columns
Expand All @@ -547,28 +546,34 @@ def _format_header_mi(self):

if self.merge_cells:
# Format multi-index as a merged cells.
for lnum in range(len(level_lengths)):
name = columns.names[lnum]
yield ExcelCell(lnum, coloffset, name, self.header_style)
for lnum, name in enumerate(columns.names):
yield ExcelCell(
row=lnum,
col=coloffset,
val=name,
style=self.header_style,
)

for lnum, (spans, levels, level_codes) in enumerate(
zip(level_lengths, columns.levels, columns.codes)
):
values = levels.take(level_codes)
for i in spans:
kwargs = dict(
row=lnum,
col=coloffset + i + 1,
val=values[i],
style=self.header_style,
)

if spans[i] > 1:
yield ExcelCell(
lnum,
coloffset + i + 1,
values[i],
self.header_style,
lnum,
coloffset + i + spans[i],
)
else:
yield ExcelCell(
lnum, coloffset + i + 1, values[i], self.header_style
kwargs.update(
{
"mergestart": lnum,
"mergeend": coloffset + i + spans[i],
}
)
yield ExcelCell(**kwargs)
else:
# Format in legacy format with dots to indicate levels.
for i, values in enumerate(zip(*level_strs)):
Expand All @@ -577,9 +582,8 @@ def _format_header_mi(self):

self.rowcounter = lnum

def _format_header_regular(self):
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
if has_aliases or self.header:
def _format_header_regular(self) -> Iterator[ExcelCell]:
if self._has_aliases or self.header:
coloffset = 0

if self.index:
Expand All @@ -588,17 +592,11 @@ def _format_header_regular(self):
coloffset = len(self.df.index[0])

colnames = self.columns
if has_aliases:
# pandas\io\formats\excel.py:593: error: Argument 1 to "len"
# has incompatible type "Union[Sequence[Optional[Hashable]],
# bool]"; expected "Sized" [arg-type]
if len(self.header) != len(self.columns): # type: ignore[arg-type]
# pandas\io\formats\excel.py:602: error: Argument 1 to
# "len" has incompatible type
# "Union[Sequence[Optional[Hashable]], bool]"; expected
# "Sized" [arg-type]
if self._has_aliases:
assert isinstance(self.header, Sized)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

glad to see this. for some reason my local mypy diagrees with the CI about whether its L601 or L602 that has the problem

Copy link
Member Author

@ivanovmg ivanovmg Nov 15, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the problem was with len(self.header), so it should be line 602, but presumably one line was added on top since then.
EDIT: I guess I am not correct here...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

once we add a runtime isinstance, this sort of duplicates the _has_aliases property.

since self.header is a sequence or bool, maybe _has_aliases could reflect that instead of the explicit checks for the 4 classes.

if we have a guarantee from the is_* call, a cast would be more appropriate than the assert.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.
Used casting instead of asserting.
In _has_aliases check if not boolean rather than 4 sequence-like classes.
Is that what you meant?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the types are correct for the header parameter of ExcelFormatter then this should be fine.

also cast is a no-op, so no harm using self.header = cast(Sequence, self.header) and the other self.header -> header changes wouldn't be needed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, changed the casting as you suggested.

if len(self.header) != len(self.columns):
raise ValueError(
f"Writing {len(self.columns)} cols " # type: ignore[arg-type]
f"Writing {len(self.columns)} cols "
f"but got {len(self.header)} aliases"
)
else:
Expand All @@ -609,7 +607,7 @@ def _format_header_regular(self):
self.rowcounter, colindex + coloffset, colname, self.header_style
)

def _format_header(self):
def _format_header(self) -> Iterator[ExcelCell]:
if isinstance(self.columns, MultiIndex):
gen = self._format_header_mi()
else:
Expand All @@ -631,15 +629,14 @@ def _format_header(self):
self.rowcounter += 1
return itertools.chain(gen, gen2)

def _format_body(self):
def _format_body(self) -> Iterator[ExcelCell]:
if isinstance(self.df.index, MultiIndex):
return self._format_hierarchical_rows()
else:
return self._format_regular_rows()

def _format_regular_rows(self):
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
if has_aliases or self.header:
def _format_regular_rows(self) -> Iterator[ExcelCell]:
if self._has_aliases or self.header:
self.rowcounter += 1

# output index and index_label?
Expand Down Expand Up @@ -676,9 +673,8 @@ def _format_regular_rows(self):

yield from self._generate_body(coloffset)

def _format_hierarchical_rows(self):
has_aliases = isinstance(self.header, (tuple, list, np.ndarray, ABCIndex))
if has_aliases or self.header:
def _format_hierarchical_rows(self) -> Iterator[ExcelCell]:
if self._has_aliases or self.header:
self.rowcounter += 1

gcolidx = 0
Expand Down Expand Up @@ -722,39 +718,42 @@ def _format_hierarchical_rows(self):
)

for i in spans:
kwargs = dict(
row=self.rowcounter + i,
col=gcolidx,
val=values[i],
style=self.header_style,
)
if spans[i] > 1:
yield ExcelCell(
self.rowcounter + i,
gcolidx,
values[i],
self.header_style,
self.rowcounter + i + spans[i] - 1,
gcolidx,
)
else:
yield ExcelCell(
self.rowcounter + i,
gcolidx,
values[i],
self.header_style,
kwargs.update(
{
"mergestart": self.rowcounter + i + spans[i] - 1,
"mergeend": gcolidx,
}
)
yield ExcelCell(**kwargs)
gcolidx += 1

else:
# Format hierarchical rows with non-merged values.
for indexcolvals in zip(*self.df.index):
for idx, indexcolval in enumerate(indexcolvals):
yield ExcelCell(
self.rowcounter + idx,
gcolidx,
indexcolval,
self.header_style,
row=self.rowcounter + idx,
col=gcolidx,
val=indexcolval,
style=self.header_style,
)
gcolidx += 1

yield from self._generate_body(gcolidx)

def _generate_body(self, coloffset: int):
@property
def _has_aliases(self) -> bool:
"""Whether the aliases for column names are present."""
return bool(isinstance(self.header, (tuple, list, np.ndarray, ABCIndex)))

def _generate_body(self, coloffset: int) -> Iterator[ExcelCell]:
if self.styler is None:
styles = None
else:
Expand All @@ -771,7 +770,7 @@ def _generate_body(self, coloffset: int):
xlstyle = self.style_converter(";".join(styles[i, colidx]))
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val, xlstyle)

def get_formatted_cells(self):
def get_formatted_cells(self) -> Iterator[ExcelCell]:
for cell in itertools.chain(self._format_header(), self._format_body()):
cell.val = self._format_value(cell.val)
yield cell
Expand Down