diff --git a/pandas/core/series.py b/pandas/core/series.py index 3e9d3d5c04559..e12f926d2eda3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1586,6 +1586,129 @@ def __repr__(self): return result + def _repr_html_(self) -> Optional[str]: + """ + Return a html representation for a particular DataFrame. + + Mainly for IPython notebook. + """ + if get_option("display.notebook_repr_html"): + max_rows = get_option("display.max_rows") + min_rows = get_option("display.min_rows") + show_dimensions = get_option("display.show_dimensions") + + formatter = fmt.SeriesFormatter( + self, + name=self.name, + length=show_dimensions, + header=True, + index=True, + bold_rows=True, + escape=True, + dtype=True, + na_rep="NaN", + float_format=None, + min_rows=min_rows, + max_rows=max_rows, + show_dimensions=show_dimensions, + decimal=".", + series_id=None, + render_links=False, + ) + return formatter.to_html(notebook=True) + else: + return None + + @Substitution( + header_type="bool", + header="Whether to print column labels, default True", + col_space_type="str or int", + col_space="The minimum width of each column in CSS length " + "units. An int is assumed to be px units.\n\n" + " .. versionadded:: 0.25.0\n" + " Ability to use str", + ) + @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + def to_html( + self, + buf=None, + col_space=None, + header=True, + index=True, + na_rep="NaN", + formatters=None, + float_format=None, + sparsify=None, + index_names=True, + justify=None, + max_rows=None, + show_dimensions=False, + decimal=".", + bold_rows=True, + classes=None, + escape=True, + notebook=False, + border=None, + series_id=None, + render_links=False, + encoding=None, + ): + """ + Render a Series as a single-column HTML table. + %(shared_params)s + bold_rows : bool, default True + Make the row labels bold in the output. + classes : str or list or tuple, default None + CSS class(es) to apply to the resulting html table. + escape : bool, default True + Convert the characters <, >, and & to HTML-safe sequences. + notebook : {True, False}, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + `` tag. Default ``pd.options.display.html.border``. + encoding : str, default "utf-8" + Set character encoding + series_id : str, optional + A css id is included in the opening `
` tag if specified. + render_links : bool, default False + Convert URLs to HTML links. + %(returns)s + See Also + -------- + to_string : Convert Series to a string. + """ + + if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: + raise ValueError("Invalid value for justify parameter") + + formatter = fmt.SeriesFormatter( + self, + col_space=col_space, + na_rep=na_rep, + formatters=formatters, + float_format=float_format, + sparsify=sparsify, + justify=justify, + index_names=index_names, + header=header, + index=index, + bold_rows=bold_rows, + escape=escape, + max_rows=max_rows, + show_dimensions=show_dimensions, + decimal=decimal, + series_id=series_id, + render_links=render_links, + ) + return formatter.to_html( + buf=buf, + classes=classes, + notebook=notebook, + border=border, + encoding=encoding, + ) + def to_string( self, buf=None, diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 17603809c2ea6..e67655e84c726 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -226,7 +226,79 @@ def to_string(self) -> str: return str("\n".join(result)) -class SeriesFormatter: +class TableFormatter: + + show_dimensions = None # type: bool + is_truncated = None # type: bool + formatters = None # type: formatters_type + columns = None # type: Index + + @property + def should_show_dimensions(self) -> Optional[bool]: + return self.show_dimensions is True or ( + self.show_dimensions == "truncate" and self.is_truncated + ) + + def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: + if isinstance(self.formatters, (list, tuple)): + if is_integer(i): + i = cast(int, i) + return self.formatters[i] + else: + return None + else: + if is_integer(i) and i not in self.columns: + i = self.columns[i] + return self.formatters.get(i, None) + + @contextmanager + def get_buffer( + self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None + ): + """ + Context manager to open, yield and close buffer for filenames or Path-like + objects, otherwise yield buf unchanged. + """ + if buf is not None: + buf = _stringify_path(buf) + else: + buf = StringIO() + + if encoding is None: + encoding = "utf-8" + elif not isinstance(buf, str): + raise ValueError("buf is not a file name and encoding is specified.") + + if hasattr(buf, "write"): + yield buf + elif isinstance(buf, str): + with codecs.open(buf, "w", encoding=encoding) as f: + yield f + else: + raise TypeError("buf is not a file name and it has no write method") + + def write_result(self, buf: IO[str]) -> None: + """ + Write the result of serialization to buf. + """ + raise AbstractMethodError(self) + + def get_result( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + ) -> Optional[str]: + """ + Perform serialization. Write to buf or return as string if buf is None. + """ + with self.get_buffer(buf, encoding=encoding) as f: + self.write_result(buf=f) + if buf is None: + return f.getvalue() + return None + + +class SeriesFormatter(TableFormatter): def __init__( self, series: "Series", @@ -240,10 +312,25 @@ def __init__( dtype: bool = True, max_rows: Optional[int] = None, min_rows: Optional[int] = None, + justify: Optional[str] = None, + sparsify: Optional[bool] = None, + formatters: Optional[formatters_type] = None, + show_dimensions: bool = False, + col_space: Optional[Union[str, int]] = None, + decimal: str = ".", + index_names: bool = True, + series_id: Optional[str] = None, + render_links: bool = False, + bold_rows: bool = False, + escape: bool = True, ): self.series = series self.buf = buf if buf is not None else StringIO() self.name = name + if formatters is None: + self.formatters = {} + else: + self.formatters = formatters self.na_rep = na_rep self.header = header self.length = length @@ -251,6 +338,25 @@ def __init__( self.max_rows = max_rows self.min_rows = min_rows + self.show_dimensions = show_dimensions + self.col_space = col_space + self.decimal = decimal + self.show_index_names = index_names + self.series_id = series_id + self.render_links = render_links + self.bold_rows = bold_rows + self.escape = escape + + if sparsify is None: + sparsify = get_option("display.multi_sparse") + + self.sparsify = sparsify + + if justify is None: + justify = get_option("display.colheader_justify") + + self.justify = justify + if float_format is None: float_format = get_option("display.float_format") self.float_format = float_format @@ -285,6 +391,7 @@ def _chk_truncate(self) -> None: self.tr_row_num = None self.tr_series = series self.truncate_v = truncate_v + self.is_truncated = self.truncate_v def _get_footer(self) -> str: name = self.series.name @@ -324,6 +431,14 @@ def _get_footer(self) -> str: return str(footer) + @property + def has_index_names(self) -> bool: + return _has_names(self.series.index) + + @property + def show_row_idx_names(self) -> bool: + return all((self.has_index_names, self.index, self.show_index_names)) + def _get_formatted_index(self) -> Tuple[List[str], bool]: index = self.tr_series.index is_multi = isinstance(index, ABCMultiIndex) @@ -384,6 +499,46 @@ def to_string(self) -> str: return str("".join(result)) + def _format_col(self) -> List[str]: + series = self.tr_series + formatter = None + return format_array( + series._values, + formatter, + float_format=self.float_format, + na_rep=self.na_rep, + decimal=self.decimal, + ) + + def to_html( + self, + buf: Optional[FilePathOrBuffer[str]] = None, + encoding: Optional[str] = None, + classes: Optional[Union[str, List, Tuple]] = None, + notebook: bool = False, + border: Optional[int] = None, + ) -> Optional[str]: + """ + Render a Series to a html table. + + Parameters + ---------- + classes : str or list-like + classes to include in the `class` attribute of the opening + ``
`` tag, in addition to the default "dataframe". + notebook : {True, False}, optional, default False + Whether the generated HTML is for IPython Notebook. + border : int + A ``border=border`` attribute is included in the opening + ``
`` tag. Default ``pd.options.display.html.border``. + """ + from pandas.io.formats.html import HTMLColumnFormatter, NotebookColumnFormatter + + Klass = NotebookColumnFormatter if notebook else HTMLColumnFormatter + return Klass(self, classes=classes, border=border).get_result( + buf=buf, encoding=encoding + ) + class TextAdjustment: def __init__(self): @@ -446,78 +601,6 @@ def _get_adjustment() -> TextAdjustment: return TextAdjustment() -class TableFormatter: - - show_dimensions = None # type: bool - is_truncated = None # type: bool - formatters = None # type: formatters_type - columns = None # type: Index - - @property - def should_show_dimensions(self) -> Optional[bool]: - return self.show_dimensions is True or ( - self.show_dimensions == "truncate" and self.is_truncated - ) - - def _get_formatter(self, i: Union[str, int]) -> Optional[Callable]: - if isinstance(self.formatters, (list, tuple)): - if is_integer(i): - i = cast(int, i) - return self.formatters[i] - else: - return None - else: - if is_integer(i) and i not in self.columns: - i = self.columns[i] - return self.formatters.get(i, None) - - @contextmanager - def get_buffer( - self, buf: Optional[FilePathOrBuffer[str]], encoding: Optional[str] = None - ): - """ - Context manager to open, yield and close buffer for filenames or Path-like - objects, otherwise yield buf unchanged. - """ - if buf is not None: - buf = _stringify_path(buf) - else: - buf = StringIO() - - if encoding is None: - encoding = "utf-8" - elif not isinstance(buf, str): - raise ValueError("buf is not a file name and encoding is specified.") - - if hasattr(buf, "write"): - yield buf - elif isinstance(buf, str): - with codecs.open(buf, "w", encoding=encoding) as f: - yield f - else: - raise TypeError("buf is not a file name and it has no write method") - - def write_result(self, buf: IO[str]) -> None: - """ - Write the result of serialization to buf. - """ - raise AbstractMethodError(self) - - def get_result( - self, - buf: Optional[FilePathOrBuffer[str]] = None, - encoding: Optional[str] = None, - ) -> Optional[str]: - """ - Perform serialization. Write to buf or return as string if buf is None. - """ - with self.get_buffer(buf, encoding=encoding) as f: - self.write_result(buf=f) - if buf is None: - return f.getvalue() - return None - - class DataFrameFormatter(TableFormatter): """ Render a DataFrame @@ -968,9 +1051,9 @@ def to_html( A ``border=border`` attribute is included in the opening ``
`` tag. Default ``pd.options.display.html.border``. """ - from pandas.io.formats.html import HTMLFormatter, NotebookFormatter + from pandas.io.formats.html import HTMLTableFormatter, NotebookTableFormatter - Klass = NotebookFormatter if notebook else HTMLFormatter + Klass = NotebookTableFormatter if notebook else HTMLTableFormatter return Klass(self, classes=classes, border=border).get_result( buf=buf, encoding=encoding ) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 38f2e332017f0..9fd2bee674048 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -15,6 +15,7 @@ from pandas.io.common import _is_url from pandas.io.formats.format import ( DataFrameFormatter, + SeriesFormatter, TableFormatter, buffer_put_lines, get_level_lengths, @@ -22,7 +23,7 @@ from pandas.io.formats.printing import pprint_thing -class HTMLFormatter(TableFormatter): +class HTMLTableFormatter(TableFormatter): """ Internal class for formatting output data in html. This class is intended for shared functionality between @@ -566,7 +567,7 @@ def _write_hierarchical_rows( ) -class NotebookFormatter(HTMLFormatter): +class NotebookTableFormatter(HTMLTableFormatter): """ Internal class for formatting output data in html for display in Jupyter Notebooks. This class is intended for functionality specific to @@ -613,3 +614,312 @@ def render(self) -> List[str]: super().render() self.write("") return self.elements + + +class HTMLColumnFormatter(HTMLTableFormatter): + """ + Internal class for formatting output data in html. + This class is intended for shared functionality between + Series.to_html() and Series._repr_html_(). + Any logic in common with other output formatting methods + should ideally be inherited from classes in format.py + and this class responsible for only producing html markup. + """ + + indent_delta = 2 + + def __init__( + self, + formatter: SeriesFormatter, + classes: Optional[Union[str, List[str], Tuple[str, ...]]] = None, + border: Optional[int] = None, + ) -> None: + self.fmt = formatter + self.classes = classes + + self.series = self.fmt.series + self.series_id = self.fmt.series_id + self.elements = [] # type: List[str] + self.bold_rows = self.fmt.bold_rows + self.escape = self.fmt.escape + self.show_dimensions = self.fmt.show_dimensions + if border is None: + border = cast(int, get_option("display.html.border")) + self.border = border + self.render_links = self.fmt.render_links + + def render(self) -> List[str]: + self._write_column() + + info = [] + if self.series.name is not None: + info.append('Name: {name}'.format(name=self.series.name)) + if self.should_show_dimensions: + info.append('Length: {rows}'.format(rows=len(self.series))) + info.append('dtype: {dtype}'.format(dtype=self.series.dtype)) + + self.write("

{info}

".format(info=', '.join(info))) + + return self.elements + + def _get_formatted_values(self) -> Dict[int, List[str]]: + return {0: self.fmt._format_col()} + + def _write_column(self, indent: int = 0) -> None: + _classes = ["series"] # Default class. + use_mathjax = get_option("display.html.use_mathjax") + if not use_mathjax: + _classes.append("tex2jax_ignore") + if self.classes is not None: + if isinstance(self.classes, str): + self.classes = self.classes.split() + if not isinstance(self.classes, (list, tuple)): + raise TypeError( + "classes must be a string, list, or tuple, " + "not {typ}".format(typ=type(self.classes)) + ) + _classes.extend(self.classes) + + if self.series_id is None: + id_section = "" + else: + id_section = ' id="{series_id}"'.format(series_id=self.series_id) + + self.write( + '
'.format( + border=self.border, cls=" ".join(_classes), id_section=id_section + ), + indent, + ) + + if self.show_row_idx_names: + self._write_header(indent + self.indent_delta) + + self._write_body(indent + self.indent_delta) + + self.write("
", indent) + + def _write_body(self, indent: int) -> None: + self.write("", indent) + fmt_values = self._get_formatted_values() + + # write values + if self.fmt.index and isinstance(self.series.index, ABCMultiIndex): + self._write_hierarchical_rows(fmt_values, indent + self.indent_delta) + else: + self._write_regular_rows(fmt_values, indent + self.indent_delta) + + self.write("", indent) + + def _write_row_header(self, indent: int) -> None: + row = [x if x is not None else "" for x in self.series.index.names] + [""] + self.write_tr(row, indent, self.indent_delta, header=True) + + def _write_header(self, indent: int) -> None: + self.write("", indent) + + if self.show_row_idx_names: + self._write_row_header(indent + self.indent_delta) + + self.write("", indent) + + def _write_regular_rows( + self, fmt_values: Mapping[int, List[str]], indent: int + ) -> None: + truncate_v = self.fmt.truncate_v + + nrows = len(self.fmt.tr_series) + + if self.fmt.index: + fmt = self.fmt._get_formatter("__index__") + if fmt is not None: + index_values = self.fmt.tr_series.index.map(fmt) + else: + index_values = self.fmt.tr_series.index.format() + + row = [] # type: List[str] + for i in range(nrows): + + if truncate_v and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + row = [] + if self.fmt.index: + row.append(index_values[i]) + row.extend(fmt_values[j][i] for j in range(self.ncols)) + + self.write_tr( + row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels + ) + + def _write_hierarchical_rows( + self, fmt_values: Mapping[int, List[str]], indent: int + ) -> None: + template = 'rowspan="{span}" valign="top"' + + truncate_v = self.fmt.truncate_v + series = self.fmt.tr_series + nrows = len(series) + + idx_values = series.index.format(sparsify=False, adjoin=False, names=False) + idx_values = list(zip(*idx_values)) + + if self.fmt.sparsify: + sentinel = object() + levels = series.index.format(sparsify=sentinel, adjoin=False, names=False) + + level_lengths = get_level_lengths(levels, sentinel) + inner_lvl = len(level_lengths) - 1 + if truncate_v: + # Insert ... row and adjust idx_values and + # level_lengths to take this into account. + ins_row = self.fmt.tr_row_num + # cast here since if truncate_v is True, self.fmt.tr_row_num is not None + ins_row = cast(int, ins_row) + inserted = False + for lnum, records in enumerate(level_lengths): + rec_new = {} + for tag, span in list(records.items()): + if tag >= ins_row: + rec_new[tag + 1] = span + elif tag + span > ins_row: + rec_new[tag] = span + 1 + + # GH 14882 - Make sure insertion done once + if not inserted: + dot_row = list(idx_values[ins_row - 1]) + dot_row[-1] = "..." + idx_values.insert(ins_row, tuple(dot_row)) + inserted = True + else: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + else: + rec_new[tag] = span + # If ins_row lies between tags, all cols idx cols + # receive ... + if tag + span == ins_row: + rec_new[ins_row] = 1 + if lnum == 0: + idx_values.insert( + ins_row, tuple(["..."] * len(level_lengths)) + ) + + # GH 14882 - Place ... in correct level + elif inserted: + dot_row = list(idx_values[ins_row]) + dot_row[inner_lvl - lnum] = "..." + idx_values[ins_row] = tuple(dot_row) + level_lengths[lnum] = rec_new + + level_lengths[inner_lvl][ins_row] = 1 + for ix_col in range(len(fmt_values)): + fmt_values[ix_col].insert(ins_row, "...") + nrows += 1 + + for i in range(nrows): + row = [] + tags = {} + + sparse_offset = 0 + j = 0 + for records, v in zip(level_lengths, idx_values[i]): + if i in records: + if records[i] > 1: + tags[j] = template.format(span=records[i]) + else: + sparse_offset += 1 + continue + + j += 1 + row.append(v) + + row.append(fmt_values[0][i]) + self.write_tr( + row, + indent, + self.indent_delta, + tags=tags, + nindex_levels=len(levels) - sparse_offset, + ) + else: + row = [] + for i in range(len(series)): + if truncate_v and i == (self.fmt.tr_row_num): + str_sep_row = ["..."] * len(row) + self.write_tr( + str_sep_row, + indent, + self.indent_delta, + tags=None, + nindex_levels=self.row_levels, + ) + + idx_values = list( + zip(*series.index.format(sparsify=False, adjoin=False, names=False)) + ) + row = [] + row.extend(idx_values[i]) + row.append(fmt_values[0][i]) + self.write_tr( + row, + indent, + self.indent_delta, + tags=None, + nindex_levels=series.index.nlevels, + ) + + @property + def row_levels(self) -> int: + if self.fmt.index: + # showing (row) index + return self.series.index.nlevels + else: + return 0 + + @property + def ncols(self) -> int: + return 1 + + +class NotebookColumnFormatter(HTMLColumnFormatter): + """ + Internal class for formatting output data in html for display in Jupyter + Notebooks. This class is intended for functionality specific to + DataFrame._repr_html_() and DataFrame.to_html(notebook=True) + """ + + def write_style(self) -> None: + # We use the "scoped" attribute here so that the desired + # style properties for the data frame are not then applied + # throughout the entire notebook. + template_first = """\ + """ + template_select = """\ + .dataframe %s { + %s: %s; + }""" + element_props = [ + ("tbody tr th:only-of-type", "vertical-align", "middle"), + ("tbody tr th", "vertical-align", "top"), + ] + template_mid = "\n\n".join(map(lambda t: template_select % t, element_props)) + template = dedent("\n".join((template_first, template_mid, template_last))) + self.write(template) + + def render(self) -> List[str]: + self.write("
") + self.write_style() + super().render() + self.write("
") + return self.elements diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index ca9db88ae7be4..8c5153c77521a 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -25,7 +25,7 @@ class LatexFormatter(TableFormatter): See Also -------- - HTMLFormatter + HTMLTableFormatter """ def __init__( diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index f0d5ef19c4468..bc9da78d1bb82 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -132,16 +132,15 @@ def test_publishes(self): df = pd.DataFrame({"A": [1, 2]}) objects = [df["A"], df, df] # dataframe / series - expected_keys = [ - {"text/plain", "application/vnd.dataresource+json"}, - {"text/plain", "text/html", "application/vnd.dataresource+json"}, - ] + expected_keys = { + "text/plain", "text/html", "application/vnd.dataresource+json" + } opt = pd.option_context("display.html.table_schema", True) - for obj, expected in zip(objects, expected_keys): + for obj in objects: with opt: formatted = self.display_formatter.format(obj) - assert set(formatted[0].keys()) == expected + assert set(formatted[0].keys()) == expected_keys with_latex = pd.option_context("display.latex.repr", True)