Skip to content

Commit 24a2155

Browse files
hein09jorisvandenbossche
authored andcommitted
ENH: Added multicolumn/multirow support for latex (pandas-dev#14184)
closes pandas-dev#13508 Print names of MultiIndex columns. Added "multicolumn" and "multirow" flags to to_latex which trigger the corresponding feature. "multicolumn_format" is used to select alignment. Multirow adds clines to visually separate sections.
1 parent 211ecd5 commit 24a2155

File tree

6 files changed

+433
-170
lines changed

6 files changed

+433
-170
lines changed

doc/source/options.rst

+150-145
Large diffs are not rendered by default.

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,7 @@ Other enhancements
182182
- ``Timedelta.isoformat`` method added for formatting Timedeltas as an `ISO 8601 duration`_. See the :ref:`Timedelta docs <timedeltas.isoformat>` (:issue:`15136`)
183183
- ``pandas.io.json.json_normalize()`` gained the option ``errors='ignore'|'raise'``; the default is ``errors='raise'`` which is backward compatible. (:issue:`14583`)
184184
- ``.select_dtypes()`` now allows the string 'datetimetz' to generically select datetimes with tz (:issue:`14910`)
185+
- The ``.to_latex()`` method will now accept ``multicolumn`` and ``multirow`` arguments to use the accompanying LaTeX enhancements
185186
- ``pd.merge_asof()`` gained the option ``direction='backward'|'forward'|'nearest'`` (:issue:`14887`)
186187
- ``Series/DataFrame.asfreq()`` have gained a ``fill_value`` parameter, to fill missing values (:issue:`3715`).
187188
- ``Series/DataFrame.resample.asfreq`` have gained a ``fill_value`` parameter, to fill missing values during resampling (:issue:`3715`).

pandas/core/config_init.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -239,14 +239,35 @@
239239
: bool
240240
This specifies if the to_latex method of a Dataframe uses escapes special
241241
characters.
242-
method. Valid values: False,True
242+
Valid values: False,True
243243
"""
244244

245245
pc_latex_longtable = """
246246
:bool
247247
This specifies if the to_latex method of a Dataframe uses the longtable
248248
format.
249-
method. Valid values: False,True
249+
Valid values: False,True
250+
"""
251+
252+
pc_latex_multicolumn = """
253+
: bool
254+
This specifies if the to_latex method of a Dataframe uses multicolumns
255+
to pretty-print MultiIndex columns.
256+
Valid values: False,True
257+
"""
258+
259+
pc_latex_multicolumn_format = """
260+
: string
261+
This specifies the format for multicolumn headers.
262+
Can be surrounded with '|'.
263+
Valid values: 'l', 'c', 'r', 'p{<width>}'
264+
"""
265+
266+
pc_latex_multirow = """
267+
: bool
268+
This specifies if the to_latex method of a Dataframe uses multirows
269+
to pretty-print MultiIndex rows.
270+
Valid values: False,True
250271
"""
251272

252273
style_backup = dict()
@@ -339,6 +360,12 @@ def mpl_style_cb(key):
339360
validator=is_bool)
340361
cf.register_option('latex.longtable', False, pc_latex_longtable,
341362
validator=is_bool)
363+
cf.register_option('latex.multicolumn', True, pc_latex_multicolumn,
364+
validator=is_bool)
365+
cf.register_option('latex.multicolumn_format', 'l', pc_latex_multicolumn,
366+
validator=is_text)
367+
cf.register_option('latex.multirow', False, pc_latex_multirow,
368+
validator=is_bool)
342369

343370
cf.deprecate_option('display.line_width',
344371
msg=pc_line_width_deprecation_warning,

pandas/core/frame.py

+38-8
Original file line numberDiff line numberDiff line change
@@ -1614,10 +1614,11 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16141614
index=True, na_rep='NaN', formatters=None, float_format=None,
16151615
sparsify=None, index_names=True, bold_rows=True,
16161616
column_format=None, longtable=None, escape=None,
1617-
encoding=None, decimal='.'):
1618-
"""
1617+
encoding=None, decimal='.', multicolumn=None,
1618+
multicolumn_format=None, multirow=None):
1619+
r"""
16191620
Render a DataFrame to a tabular environment table. You can splice
1620-
this into a LaTeX document. Requires \\usepackage{booktabs}.
1621+
this into a LaTeX document. Requires \usepackage{booktabs}.
16211622
16221623
`to_latex`-specific options:
16231624
@@ -1628,27 +1629,54 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16281629
<https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3
16291630
columns
16301631
longtable : boolean, default will be read from the pandas config module
1631-
default: False
1632+
Default: False.
16321633
Use a longtable environment instead of tabular. Requires adding
1633-
a \\usepackage{longtable} to your LaTeX preamble.
1634+
a \usepackage{longtable} to your LaTeX preamble.
16341635
escape : boolean, default will be read from the pandas config module
1635-
default: True
1636+
Default: True.
16361637
When set to False prevents from escaping latex special
16371638
characters in column names.
16381639
encoding : str, default None
16391640
A string representing the encoding to use in the output file,
16401641
defaults to 'ascii' on Python 2 and 'utf-8' on Python 3.
16411642
decimal : string, default '.'
1642-
Character recognized as decimal separator, e.g. ',' in Europe
1643+
Character recognized as decimal separator, e.g. ',' in Europe.
16431644
16441645
.. versionadded:: 0.18.0
16451646
1647+
multicolumn : boolean, default True
1648+
Use \multicolumn to enhance MultiIndex columns.
1649+
The default will be read from the config module.
1650+
1651+
.. versionadded:: 0.20.0
1652+
1653+
multicolumn_format : str, default 'l'
1654+
The alignment for multicolumns, similar to `column_format`
1655+
The default will be read from the config module.
1656+
1657+
.. versionadded:: 0.20.0
1658+
1659+
multirow : boolean, default False
1660+
Use \multirow to enhance MultiIndex rows.
1661+
Requires adding a \usepackage{multirow} to your LaTeX preamble.
1662+
Will print centered labels (instead of top-aligned)
1663+
across the contained rows, separating groups via clines.
1664+
The default will be read from the pandas config module.
1665+
1666+
.. versionadded:: 0.20.0
1667+
16461668
"""
16471669
# Get defaults from the pandas config
16481670
if longtable is None:
16491671
longtable = get_option("display.latex.longtable")
16501672
if escape is None:
16511673
escape = get_option("display.latex.escape")
1674+
if multicolumn is None:
1675+
multicolumn = get_option("display.latex.multicolumn")
1676+
if multicolumn_format is None:
1677+
multicolumn_format = get_option("display.latex.multicolumn_format")
1678+
if multirow is None:
1679+
multirow = get_option("display.latex.multirow")
16521680

16531681
formatter = fmt.DataFrameFormatter(self, buf=buf, columns=columns,
16541682
col_space=col_space, na_rep=na_rep,
@@ -1660,7 +1688,9 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
16601688
index_names=index_names,
16611689
escape=escape, decimal=decimal)
16621690
formatter.to_latex(column_format=column_format, longtable=longtable,
1663-
encoding=encoding)
1691+
encoding=encoding, multicolumn=multicolumn,
1692+
multicolumn_format=multicolumn_format,
1693+
multirow=multirow)
16641694

16651695
if buf is None:
16661696
return formatter.buf.getvalue()

pandas/formats/format.py

+109-6
Original file line numberDiff line numberDiff line change
@@ -650,13 +650,17 @@ def _join_multiline(self, *strcols):
650650
st = ed
651651
return '\n\n'.join(str_lst)
652652

653-
def to_latex(self, column_format=None, longtable=False, encoding=None):
653+
def to_latex(self, column_format=None, longtable=False, encoding=None,
654+
multicolumn=False, multicolumn_format=None, multirow=False):
654655
"""
655656
Render a DataFrame to a LaTeX tabular/longtable environment output.
656657
"""
657658

658659
latex_renderer = LatexFormatter(self, column_format=column_format,
659-
longtable=longtable)
660+
longtable=longtable,
661+
multicolumn=multicolumn,
662+
multicolumn_format=multicolumn_format,
663+
multirow=multirow)
660664

661665
if encoding is None:
662666
encoding = 'ascii' if compat.PY2 else 'utf-8'
@@ -824,11 +828,15 @@ class LatexFormatter(TableFormatter):
824828
HTMLFormatter
825829
"""
826830

827-
def __init__(self, formatter, column_format=None, longtable=False):
831+
def __init__(self, formatter, column_format=None, longtable=False,
832+
multicolumn=False, multicolumn_format=None, multirow=False):
828833
self.fmt = formatter
829834
self.frame = self.fmt.frame
830835
self.column_format = column_format
831836
self.longtable = longtable
837+
self.multicolumn = multicolumn
838+
self.multicolumn_format = multicolumn_format
839+
self.multirow = multirow
832840

833841
def write_result(self, buf):
834842
"""
@@ -850,14 +858,21 @@ def get_col_type(dtype):
850858
else:
851859
return 'l'
852860

861+
# reestablish the MultiIndex that has been joined by _to_str_column
853862
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
854863
clevels = self.frame.columns.nlevels
855864
strcols.pop(0)
856865
name = any(self.frame.index.names)
866+
cname = any(self.frame.columns.names)
867+
lastcol = self.frame.index.nlevels - 1
857868
for i, lev in enumerate(self.frame.index.levels):
858869
lev2 = lev.format()
859870
blank = ' ' * len(lev2[0])
860-
lev3 = [blank] * clevels
871+
# display column names in last index-column
872+
if cname and i == lastcol:
873+
lev3 = [x if x else '{}' for x in self.frame.columns.names]
874+
else:
875+
lev3 = [blank] * clevels
861876
if name:
862877
lev3.append(lev.name)
863878
for level_idx, group in itertools.groupby(
@@ -885,10 +900,15 @@ def get_col_type(dtype):
885900
buf.write('\\begin{longtable}{%s}\n' % column_format)
886901
buf.write('\\toprule\n')
887902

888-
nlevels = self.frame.columns.nlevels
903+
ilevels = self.frame.index.nlevels
904+
clevels = self.frame.columns.nlevels
905+
nlevels = clevels
889906
if any(self.frame.index.names):
890907
nlevels += 1
891-
for i, row in enumerate(zip(*strcols)):
908+
strrows = list(zip(*strcols))
909+
self.clinebuf = []
910+
911+
for i, row in enumerate(strrows):
892912
if i == nlevels and self.fmt.header:
893913
buf.write('\\midrule\n') # End of header
894914
if self.longtable:
@@ -910,15 +930,98 @@ def get_col_type(dtype):
910930
if x else '{}') for x in row]
911931
else:
912932
crow = [x if x else '{}' for x in row]
933+
if i < clevels and self.fmt.header and self.multicolumn:
934+
# sum up columns to multicolumns
935+
crow = self._format_multicolumn(crow, ilevels)
936+
if (i >= nlevels and self.fmt.index and self.multirow and
937+
ilevels > 1):
938+
# sum up rows to multirows
939+
crow = self._format_multirow(crow, ilevels, i, strrows)
913940
buf.write(' & '.join(crow))
914941
buf.write(' \\\\\n')
942+
if self.multirow and i < len(strrows) - 1:
943+
self._print_cline(buf, i, len(strcols))
915944

916945
if not self.longtable:
917946
buf.write('\\bottomrule\n')
918947
buf.write('\\end{tabular}\n')
919948
else:
920949
buf.write('\\end{longtable}\n')
921950

951+
def _format_multicolumn(self, row, ilevels):
952+
"""
953+
Combine columns belonging to a group to a single multicolumn entry
954+
according to self.multicolumn_format
955+
956+
e.g.:
957+
a & & & b & c &
958+
will become
959+
\multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
960+
"""
961+
row2 = list(row[:ilevels])
962+
ncol = 1
963+
coltext = ''
964+
965+
def append_col():
966+
# write multicolumn if needed
967+
if ncol > 1:
968+
row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
969+
.format(ncol, self.multicolumn_format,
970+
coltext.strip()))
971+
# don't modify where not needed
972+
else:
973+
row2.append(coltext)
974+
for c in row[ilevels:]:
975+
# if next col has text, write the previous
976+
if c.strip():
977+
if coltext:
978+
append_col()
979+
coltext = c
980+
ncol = 1
981+
# if not, add it to the previous multicolumn
982+
else:
983+
ncol += 1
984+
# write last column name
985+
if coltext:
986+
append_col()
987+
return row2
988+
989+
def _format_multirow(self, row, ilevels, i, rows):
990+
"""
991+
Check following rows, whether row should be a multirow
992+
993+
e.g.: becomes:
994+
a & 0 & \multirow{2}{*}{a} & 0 &
995+
& 1 & & 1 &
996+
b & 0 & \cline{1-2}
997+
b & 0 &
998+
"""
999+
for j in range(ilevels):
1000+
if row[j].strip():
1001+
nrow = 1
1002+
for r in rows[i + 1:]:
1003+
if not r[j].strip():
1004+
nrow += 1
1005+
else:
1006+
break
1007+
if nrow > 1:
1008+
# overwrite non-multirow entry
1009+
row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format(
1010+
nrow, row[j].strip())
1011+
# save when to end the current block with \cline
1012+
self.clinebuf.append([i + nrow - 1, j + 1])
1013+
return row
1014+
1015+
def _print_cline(self, buf, i, icol):
1016+
"""
1017+
Print clines after multirow-blocks are finished
1018+
"""
1019+
for cl in self.clinebuf:
1020+
if cl[0] == i:
1021+
buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol))
1022+
# remove entries that have been written to buffer
1023+
self.clinebuf = [x for x in self.clinebuf if x[0] != i]
1024+
9221025

9231026
class HTMLFormatter(TableFormatter):
9241027

0 commit comments

Comments
 (0)