Skip to content

Commit 7ec74e5

Browse files
tomneeptoobaz
authored andcommitted
BUG: fix to_latex() when using MultiIndex with NaN in (#20797)
closes #14249 closes #19981 closes #18326 closes #18669
1 parent d1ace10 commit 7ec74e5

File tree

3 files changed

+101
-29
lines changed

3 files changed

+101
-29
lines changed

doc/source/whatsnew/v0.23.0.txt

+4
Original file line numberDiff line numberDiff line change
@@ -1156,6 +1156,10 @@ I/O
11561156
- Bug in :func:`read_csv` causing heap corruption on 32-bit, big-endian architectures (:issue:`20785`)
11571157
- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
11581158
- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
1159+
- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`)
1160+
- Bug in :func:`DataFrame.to_latex()` where a non-string index-level name would result in an ``AttributeError`` (:issue:`19981`)
1161+
- Bug in :func:`DataFrame.to_latex()` where the combination of an index name and the `index_names=False` option would result in incorrect output (:issue:`18326`)
1162+
- Bug in :func:`DataFrame.to_latex()` where a ``MultiIndex`` with an empty string as its name would result in incorrect output (:issue:`18669`)
11591163
- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)
11601164
- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`)
11611165
- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`)

pandas/io/formats/latex.py

+26-29
Original file line numberDiff line numberDiff line change
@@ -64,35 +64,32 @@ def get_col_type(dtype):
6464

6565
# reestablish the MultiIndex that has been joined by _to_str_column
6666
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
67+
out = self.frame.index.format(
68+
adjoin=False, sparsify=self.fmt.sparsify,
69+
names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
70+
)
71+
72+
# index.format will sparsify repeated entries with empty strings
73+
# so pad these with some empty space
74+
def pad_empties(x):
75+
for pad in reversed(x):
76+
if pad:
77+
break
78+
return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
79+
out = (pad_empties(i) for i in out)
80+
81+
# Add empty spaces for each column level
6782
clevels = self.frame.columns.nlevels
68-
strcols.pop(0)
69-
name = any(self.frame.index.names)
70-
cname = any(self.frame.columns.names)
71-
lastcol = self.frame.index.nlevels - 1
72-
previous_lev3 = None
73-
for i, lev in enumerate(self.frame.index.levels):
74-
lev2 = lev.format()
75-
blank = ' ' * len(lev2[0])
76-
# display column names in last index-column
77-
if cname and i == lastcol:
78-
lev3 = [x if x else '{}' for x in self.frame.columns.names]
79-
else:
80-
lev3 = [blank] * clevels
81-
if name:
82-
lev3.append(lev.name)
83-
current_idx_val = None
84-
for level_idx in self.frame.index.labels[i]:
85-
if ((previous_lev3 is None or
86-
previous_lev3[len(lev3)].isspace()) and
87-
lev2[level_idx] == current_idx_val):
88-
# same index as above row and left index was the same
89-
lev3.append(blank)
90-
else:
91-
# different value than above or left index different
92-
lev3.append(lev2[level_idx])
93-
current_idx_val = lev2[level_idx]
94-
strcols.insert(i, lev3)
95-
previous_lev3 = lev3
83+
out = [[' ' * len(i[-1])] * clevels + i for i in out]
84+
85+
# Add the column names to the last index column
86+
cnames = self.frame.columns.names
87+
if any(cnames):
88+
new_names = [i if i else '{}' for i in cnames]
89+
out[self.frame.index.nlevels - 1][:clevels] = new_names
90+
91+
# Get rid of old multiindex column and add new ones
92+
strcols = out + strcols[1:]
9693

9794
column_format = self.column_format
9895
if column_format is None:
@@ -118,7 +115,7 @@ def get_col_type(dtype):
118115
ilevels = self.frame.index.nlevels
119116
clevels = self.frame.columns.nlevels
120117
nlevels = clevels
121-
if any(self.frame.index.names):
118+
if self.fmt.has_index_names and self.fmt.show_index_names:
122119
nlevels += 1
123120
strrows = list(zip(*strcols))
124121
self.clinebuf = []

pandas/tests/io/formats/test_to_latex.py

+71
Original file line numberDiff line numberDiff line change
@@ -621,3 +621,74 @@ def test_to_latex_multiindex_names(self, name0, name1, axes):
621621
\end{tabular}
622622
""" % tuple(list(col_names) + [idx_names_row])
623623
assert observed == expected
624+
625+
@pytest.mark.parametrize('one_row', [True, False])
626+
def test_to_latex_multiindex_nans(self, one_row):
627+
# GH 14249
628+
df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]})
629+
if one_row:
630+
df = df.iloc[[0]]
631+
observed = df.set_index(['a', 'b']).to_latex()
632+
expected = r"""\begin{tabular}{llr}
633+
\toprule
634+
& & c \\
635+
a & b & \\
636+
\midrule
637+
NaN & 2 & 4 \\
638+
"""
639+
if not one_row:
640+
expected += r"""1.0 & 3 & 5 \\
641+
"""
642+
expected += r"""\bottomrule
643+
\end{tabular}
644+
"""
645+
assert observed == expected
646+
647+
def test_to_latex_non_string_index(self):
648+
# GH 19981
649+
observed = pd.DataFrame([[1, 2, 3]] * 2).set_index([0, 1]).to_latex()
650+
expected = r"""\begin{tabular}{llr}
651+
\toprule
652+
& & 2 \\
653+
0 & 1 & \\
654+
\midrule
655+
1 & 2 & 3 \\
656+
& 2 & 3 \\
657+
\bottomrule
658+
\end{tabular}
659+
"""
660+
assert observed == expected
661+
662+
def test_to_latex_midrule_location(self):
663+
# GH 18326
664+
df = pd.DataFrame({'a': [1, 2]})
665+
df.index.name = 'foo'
666+
observed = df.to_latex(index_names=False)
667+
expected = r"""\begin{tabular}{lr}
668+
\toprule
669+
{} & a \\
670+
\midrule
671+
0 & 1 \\
672+
1 & 2 \\
673+
\bottomrule
674+
\end{tabular}
675+
"""
676+
677+
assert observed == expected
678+
679+
def test_to_latex_multiindex_empty_name(self):
680+
# GH 18669
681+
mi = pd.MultiIndex.from_product([[1, 2]], names=[''])
682+
df = pd.DataFrame(-1, index=mi, columns=range(4))
683+
observed = df.to_latex()
684+
expected = r"""\begin{tabular}{lrrrr}
685+
\toprule
686+
& 0 & 1 & 2 & 3 \\
687+
{} & & & & \\
688+
\midrule
689+
1 & -1 & -1 & -1 & -1 \\
690+
2 & -1 & -1 & -1 & -1 \\
691+
\bottomrule
692+
\end{tabular}
693+
"""
694+
assert observed == expected

0 commit comments

Comments
 (0)