diff --git a/doc/source/whatsnew/v0.21.1.txt b/doc/source/whatsnew/v0.21.1.txt index fcc9144bef9e3..0ad0cac0a4181 100644 --- a/doc/source/whatsnew/v0.21.1.txt +++ b/doc/source/whatsnew/v0.21.1.txt @@ -120,6 +120,7 @@ I/O - Bug in :meth:`DataFrame.to_msgpack` when serializing data of the numpy.bool_ datatype (:issue:`18390`) - Bug in :func:`read_json` not decoding when reading line deliminted JSON from S3 (:issue:`17200`) - Bug in :func:`pandas.io.json.json_normalize` to avoid modification of ``meta`` (:issue:`18610`) +- Bug in :func:`to_latex` where repeated multi-index values were not printed even though a higher level index differed from the previous row (:issue:`14484`) Plotting ^^^^^^^^ diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 8f25eb3af70cd..97abf60a40a5b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -46,7 +46,6 @@ import pandas as pd import numpy as np -import itertools import csv from functools import partial @@ -903,6 +902,7 @@ def get_col_type(dtype): name = any(self.frame.index.names) cname = any(self.frame.columns.names) lastcol = self.frame.index.nlevels - 1 + previous_lev3 = None for i, lev in enumerate(self.frame.index.levels): lev2 = lev.format() blank = ' ' * len(lev2[0]) @@ -913,11 +913,19 @@ def get_col_type(dtype): lev3 = [blank] * clevels if name: lev3.append(lev.name) - for level_idx, group in itertools.groupby( - self.frame.index.labels[i]): - count = len(list(group)) - lev3.extend([lev2[level_idx]] + [blank] * (count - 1)) + current_idx_val = None + for level_idx in self.frame.index.labels[i]: + if ((previous_lev3 is None or + previous_lev3[len(lev3)].isspace()) and + lev2[level_idx] == current_idx_val): + # same index as above row and left index was the same + lev3.append(blank) + else: + # different value than above or left index different + lev3.append(lev2[level_idx]) + current_idx_val = lev2[level_idx] strcols.insert(i, lev3) + previous_lev3 = lev3 column_format = self.column_format if column_format is None: diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 35ef5a1cf5c72..7d42ff20ea31e 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -221,6 +221,28 @@ def test_to_latex_multiindex(self): assert result == expected + def test_to_latex_multiindex_dupe_level(self): + # see gh-14484 + # + # If an index is repeated in subsequent rows, it should be + # replaced with a blank in the created table. This should + # ONLY happen if all higher order indices (to the left) are + # equal too. In this test, 'c' has to be printed both times + # because the higher order index 'A' != 'B'. + df = pd.DataFrame(index=pd.MultiIndex.from_tuples( + [('A', 'c'), ('B', 'c')]), columns=['col']) + result = df.to_latex() + expected = r"""\begin{tabular}{lll} +\toprule + & & col \\ +\midrule +A & c & NaN \\ +B & c & NaN \\ +\bottomrule +\end{tabular} +""" + assert result == expected + def test_to_latex_multicolumnrow(self): df = pd.DataFrame({ ('c1', 0): {x: x for x in range(5)},