Skip to content

Commit 0d574a5

Browse files
ivanovmgJulianWgs
authored andcommitted
REF: dataframe formatters/outputs (pandas-dev#36510)
1 parent fc3ea3e commit 0d574a5

File tree

8 files changed

+640
-538
lines changed

8 files changed

+640
-538
lines changed

pandas/_typing.py

+6
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@
3838
from pandas.core.indexes.base import Index
3939
from pandas.core.series import Series
4040

41+
from pandas.io.formats.format import EngFormatter
42+
4143
# array-like
4244

4345
AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
@@ -127,6 +129,10 @@
127129
EncodingVar = TypeVar("EncodingVar", str, None, Optional[str])
128130

129131

132+
# type of float formatter in DataFrameFormatter
133+
FloatFormatType = Union[str, Callable, "EngFormatter"]
134+
135+
130136
@dataclass
131137
class IOargs(Generic[ModeVar, EncodingVar]):
132138
"""

pandas/core/frame.py

+12-11
Original file line numberDiff line numberDiff line change
@@ -788,10 +788,8 @@ def _repr_html_(self) -> Optional[str]:
788788
max_cols=max_cols,
789789
show_dimensions=show_dimensions,
790790
decimal=".",
791-
table_id=None,
792-
render_links=False,
793791
)
794-
return formatter.to_html(notebook=True)
792+
return fmt.DataFrameRenderer(formatter).to_html(notebook=True)
795793
else:
796794
return None
797795

@@ -874,9 +872,12 @@ def to_string(
874872
max_cols=max_cols,
875873
show_dimensions=show_dimensions,
876874
decimal=decimal,
875+
)
876+
return fmt.DataFrameRenderer(formatter).to_string(
877+
buf=buf,
878+
encoding=encoding,
877879
line_width=line_width,
878880
)
879-
return formatter.to_string(buf=buf, encoding=encoding)
880881

881882
# ----------------------------------------------------------------------
882883

@@ -2476,29 +2477,29 @@ def to_html(
24762477
columns=columns,
24772478
col_space=col_space,
24782479
na_rep=na_rep,
2480+
header=header,
2481+
index=index,
24792482
formatters=formatters,
24802483
float_format=float_format,
2484+
bold_rows=bold_rows,
24812485
sparsify=sparsify,
24822486
justify=justify,
24832487
index_names=index_names,
2484-
header=header,
2485-
index=index,
2486-
bold_rows=bold_rows,
24872488
escape=escape,
2489+
decimal=decimal,
24882490
max_rows=max_rows,
24892491
max_cols=max_cols,
24902492
show_dimensions=show_dimensions,
2491-
decimal=decimal,
2492-
table_id=table_id,
2493-
render_links=render_links,
24942493
)
24952494
# TODO: a generic formatter wld b in DataFrameFormatter
2496-
return formatter.to_html(
2495+
return fmt.DataFrameRenderer(formatter).to_html(
24972496
buf=buf,
24982497
classes=classes,
24992498
notebook=notebook,
25002499
border=border,
25012500
encoding=encoding,
2501+
table_id=table_id,
2502+
render_links=render_links,
25022503
)
25032504

25042505
# ----------------------------------------------------------------------

pandas/core/generic.py

+17-20
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
from datetime import timedelta
55
import functools
66
import gc
7-
from io import StringIO
87
import json
98
import operator
109
import pickle
@@ -109,7 +108,11 @@
109108
from pandas.core.window import Expanding, ExponentialMovingWindow, Rolling, Window
110109

111110
from pandas.io.formats import format as fmt
112-
from pandas.io.formats.format import DataFrameFormatter, format_percentiles
111+
from pandas.io.formats.format import (
112+
DataFrameFormatter,
113+
DataFrameRenderer,
114+
format_percentiles,
115+
)
113116
from pandas.io.formats.printing import pprint_thing
114117

115118
if TYPE_CHECKING:
@@ -3149,7 +3152,7 @@ def to_latex(
31493152
escape=escape,
31503153
decimal=decimal,
31513154
)
3152-
return formatter.to_latex(
3155+
return DataFrameRenderer(formatter).to_latex(
31533156
buf=buf,
31543157
column_format=column_format,
31553158
longtable=longtable,
@@ -3182,7 +3185,7 @@ def to_csv(
31823185
date_format: Optional[str] = None,
31833186
doublequote: bool_t = True,
31843187
escapechar: Optional[str] = None,
3185-
decimal: Optional[str] = ".",
3188+
decimal: str = ".",
31863189
errors: str = "strict",
31873190
storage_options: StorageOptions = None,
31883191
) -> Optional[str]:
@@ -3340,39 +3343,33 @@ def to_csv(
33403343
"""
33413344
df = self if isinstance(self, ABCDataFrame) else self.to_frame()
33423345

3343-
from pandas.io.formats.csvs import CSVFormatter
3346+
formatter = DataFrameFormatter(
3347+
frame=df,
3348+
header=header,
3349+
index=index,
3350+
na_rep=na_rep,
3351+
float_format=float_format,
3352+
decimal=decimal,
3353+
)
33443354

3345-
formatter = CSVFormatter(
3346-
df,
3355+
return DataFrameRenderer(formatter).to_csv(
33473356
path_or_buf,
33483357
line_terminator=line_terminator,
33493358
sep=sep,
33503359
encoding=encoding,
33513360
errors=errors,
33523361
compression=compression,
33533362
quoting=quoting,
3354-
na_rep=na_rep,
3355-
float_format=float_format,
3356-
cols=columns,
3357-
header=header,
3358-
index=index,
3363+
columns=columns,
33593364
index_label=index_label,
33603365
mode=mode,
33613366
chunksize=chunksize,
33623367
quotechar=quotechar,
33633368
date_format=date_format,
33643369
doublequote=doublequote,
33653370
escapechar=escapechar,
3366-
decimal=decimal,
33673371
storage_options=storage_options,
33683372
)
3369-
formatter.save()
3370-
3371-
if path_or_buf is None:
3372-
assert isinstance(formatter.path_or_buf, StringIO)
3373-
return formatter.path_or_buf.getvalue()
3374-
3375-
return None
33763373

33773374
# ----------------------------------------------------------------------
33783375
# Lookup Caching

pandas/io/formats/csvs.py

+47-56
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
import csv as csvlib
66
from io import StringIO, TextIOWrapper
77
import os
8-
from typing import Any, Dict, Hashable, Iterator, List, Optional, Sequence, Union
8+
from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Union
99

1010
import numpy as np
1111

1212
from pandas._libs import writers as libwriters
1313
from pandas._typing import (
1414
CompressionOptions,
1515
FilePathOrBuffer,
16+
FloatFormatType,
1617
IndexLabel,
1718
Label,
1819
StorageOptions,
@@ -30,18 +31,17 @@
3031

3132
from pandas.io.common import get_filepath_or_buffer, get_handle
3233

34+
if TYPE_CHECKING:
35+
from pandas.io.formats.format import DataFrameFormatter
36+
3337

3438
class CSVFormatter:
3539
def __init__(
3640
self,
37-
obj,
41+
formatter: "DataFrameFormatter",
3842
path_or_buf: Optional[FilePathOrBuffer[str]] = None,
3943
sep: str = ",",
40-
na_rep: str = "",
41-
float_format: Optional[str] = None,
4244
cols: Optional[Sequence[Label]] = None,
43-
header: Union[bool, Sequence[Hashable]] = True,
44-
index: bool = True,
4545
index_label: Optional[IndexLabel] = None,
4646
mode: str = "w",
4747
encoding: Optional[str] = None,
@@ -54,10 +54,11 @@ def __init__(
5454
date_format: Optional[str] = None,
5555
doublequote: bool = True,
5656
escapechar: Optional[str] = None,
57-
decimal=".",
5857
storage_options: StorageOptions = None,
5958
):
60-
self.obj = obj
59+
self.fmt = formatter
60+
61+
self.obj = self.fmt.frame
6162

6263
self.encoding = encoding or "utf-8"
6364

@@ -79,35 +80,45 @@ def __init__(
7980
self.mode = ioargs.mode
8081

8182
self.sep = sep
82-
self.na_rep = na_rep
83-
self.float_format = float_format
84-
self.decimal = decimal
85-
self.header = header
86-
self.index = index
87-
self.index_label = index_label
83+
self.index_label = self._initialize_index_label(index_label)
8884
self.errors = errors
8985
self.quoting = quoting or csvlib.QUOTE_MINIMAL
90-
self.quotechar = quotechar
86+
self.quotechar = self._initialize_quotechar(quotechar)
9187
self.doublequote = doublequote
9288
self.escapechar = escapechar
9389
self.line_terminator = line_terminator or os.linesep
9490
self.date_format = date_format
95-
self.cols = cols # type: ignore[assignment]
96-
self.chunksize = chunksize # type: ignore[assignment]
91+
self.cols = self._initialize_columns(cols)
92+
self.chunksize = self._initialize_chunksize(chunksize)
93+
94+
@property
95+
def na_rep(self) -> str:
96+
return self.fmt.na_rep
97+
98+
@property
99+
def float_format(self) -> Optional["FloatFormatType"]:
100+
return self.fmt.float_format
97101

98102
@property
99-
def index_label(self) -> IndexLabel:
100-
return self._index_label
103+
def decimal(self) -> str:
104+
return self.fmt.decimal
101105

102-
@index_label.setter
103-
def index_label(self, index_label: Optional[IndexLabel]) -> None:
106+
@property
107+
def header(self) -> Union[bool, Sequence[str]]:
108+
return self.fmt.header
109+
110+
@property
111+
def index(self) -> bool:
112+
return self.fmt.index
113+
114+
def _initialize_index_label(self, index_label: Optional[IndexLabel]) -> IndexLabel:
104115
if index_label is not False:
105116
if index_label is None:
106-
index_label = self._get_index_label_from_obj()
117+
return self._get_index_label_from_obj()
107118
elif not isinstance(index_label, (list, tuple, np.ndarray, ABCIndexClass)):
108119
# given a string for a DF with Index
109-
index_label = [index_label]
110-
self._index_label = index_label
120+
return [index_label]
121+
return index_label
111122

112123
def _get_index_label_from_obj(self) -> List[str]:
113124
if isinstance(self.obj.index, ABCMultiIndex):
@@ -122,30 +133,17 @@ def _get_index_label_flat(self) -> List[str]:
122133
index_label = self.obj.index.name
123134
return [""] if index_label is None else [index_label]
124135

125-
@property
126-
def quotechar(self) -> Optional[str]:
136+
def _initialize_quotechar(self, quotechar: Optional[str]) -> Optional[str]:
127137
if self.quoting != csvlib.QUOTE_NONE:
128138
# prevents crash in _csv
129-
return self._quotechar
139+
return quotechar
130140
return None
131141

132-
@quotechar.setter
133-
def quotechar(self, quotechar: Optional[str]) -> None:
134-
self._quotechar = quotechar
135-
136142
@property
137143
def has_mi_columns(self) -> bool:
138144
return bool(isinstance(self.obj.columns, ABCMultiIndex))
139145

140-
@property
141-
def cols(self) -> Sequence[Label]:
142-
return self._cols
143-
144-
@cols.setter
145-
def cols(self, cols: Optional[Sequence[Label]]) -> None:
146-
self._cols = self._refine_cols(cols)
147-
148-
def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:
146+
def _initialize_columns(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:
149147
# validate mi options
150148
if self.has_mi_columns:
151149
if cols is not None:
@@ -161,12 +159,16 @@ def _refine_cols(self, cols: Optional[Sequence[Label]]) -> Sequence[Label]:
161159

162160
# update columns to include possible multiplicity of dupes
163161
# and make sure sure cols is just a list of labels
164-
cols = self.obj.columns
165-
if isinstance(cols, ABCIndexClass):
166-
return cols._format_native_types(**self._number_format)
162+
new_cols = self.obj.columns
163+
if isinstance(new_cols, ABCIndexClass):
164+
return new_cols._format_native_types(**self._number_format)
167165
else:
168-
assert isinstance(cols, Sequence)
169-
return list(cols)
166+
return list(new_cols)
167+
168+
def _initialize_chunksize(self, chunksize: Optional[int]) -> int:
169+
if chunksize is None:
170+
return (100000 // (len(self.cols) or 1)) or 1
171+
return int(chunksize)
170172

171173
@property
172174
def _number_format(self) -> Dict[str, Any]:
@@ -179,17 +181,6 @@ def _number_format(self) -> Dict[str, Any]:
179181
decimal=self.decimal,
180182
)
181183

182-
@property
183-
def chunksize(self) -> int:
184-
return self._chunksize
185-
186-
@chunksize.setter
187-
def chunksize(self, chunksize: Optional[int]) -> None:
188-
if chunksize is None:
189-
chunksize = (100000 // (len(self.cols) or 1)) or 1
190-
assert chunksize is not None
191-
self._chunksize = int(chunksize)
192-
193184
@property
194185
def data_index(self) -> Index:
195186
data_index = self.obj.index

0 commit comments

Comments
 (0)