Skip to content

Commit c0daa15

Browse files
committed
Refactor LatexFormatter.write_result() multiindex handling to be much cleaner
1 parent cad6dc7 commit c0daa15

File tree

3 files changed

+49
-29
lines changed

3 files changed

+49
-29
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -966,6 +966,7 @@ I/O
966966
- Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`)
967967
- Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`)
968968
- Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`)
969+
- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`)
969970
- Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`)
970971
- Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`)
971972
- :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`)

pandas/io/formats/latex.py

+26-29
Original file line numberDiff line numberDiff line change
@@ -64,35 +64,32 @@ def get_col_type(dtype):
6464

6565
# reestablish the MultiIndex that has been joined by _to_str_column
6666
if self.fmt.index and isinstance(self.frame.index, MultiIndex):
67+
out = self.frame.index.format(
68+
adjoin=False, sparsify=self.fmt.sparsify,
69+
names=self.fmt.has_index_names, na_rep=self.fmt.na_rep
70+
)
71+
72+
# index.format will sparsify repeated entries with empty strings
73+
# so pad these with some empty space
74+
def pad_empties(x):
75+
for pad in reversed(x):
76+
if pad:
77+
break
78+
return [x[0]] + [i if i else ' ' * len(pad) for i in x[1:]]
79+
out = (pad_empties(i) for i in out)
80+
81+
# Add empty spaces for each column level
6782
clevels = self.frame.columns.nlevels
68-
strcols.pop(0)
69-
name = any(self.frame.index.names)
70-
cname = any(self.frame.columns.names)
71-
lastcol = self.frame.index.nlevels - 1
72-
previous_lev3 = None
73-
for i, lev in enumerate(self.frame.index.levels):
74-
lev2 = lev.format()
75-
blank = ' ' * len(lev2[0])
76-
# display column names in last index-column
77-
if cname and i == lastcol:
78-
lev3 = [x if x else '{}' for x in self.frame.columns.names]
79-
else:
80-
lev3 = [blank] * clevels
81-
if name:
82-
lev3.append(lev.name)
83-
current_idx_val = None
84-
for level_idx in self.frame.index.labels[i]:
85-
if ((previous_lev3 is None or
86-
previous_lev3[len(lev3)].isspace()) and
87-
lev2[level_idx] == current_idx_val):
88-
# same index as above row and left index was the same
89-
lev3.append(blank)
90-
else:
91-
# different value than above or left index different
92-
lev3.append(lev2[level_idx])
93-
current_idx_val = lev2[level_idx]
94-
strcols.insert(i, lev3)
95-
previous_lev3 = lev3
83+
out = [[' ' * len(i[-1])] * clevels + i for i in out]
84+
85+
# Add the column names to the last index column
86+
cnames = self.frame.columns.names
87+
if any(cnames):
88+
new_names = [i if i else '{}' for i in cnames]
89+
out[self.frame.index.nlevels - 1][:clevels] = new_names
90+
91+
# Get rid of old multiindex column and add new ones
92+
strcols = out + strcols[1:]
9693

9794
column_format = self.column_format
9895
if column_format is None:
@@ -118,7 +115,7 @@ def get_col_type(dtype):
118115
ilevels = self.frame.index.nlevels
119116
clevels = self.frame.columns.nlevels
120117
nlevels = clevels
121-
if any(self.frame.index.names):
118+
if self.fmt.has_index_names and self.fmt.show_index_names:
122119
nlevels += 1
123120
strrows = list(zip(*strcols))
124121
self.clinebuf = []

pandas/tests/io/formats/test_to_latex.py

+22
Original file line numberDiff line numberDiff line change
@@ -621,3 +621,25 @@ def test_to_latex_multiindex_names(self, name0, name1, axes):
621621
\end{tabular}
622622
""" % tuple(list(col_names) + [idx_names_row])
623623
assert observed == expected
624+
625+
@pytest.mark.parametrize('one_row', [True, False])
626+
def test_to_latex_multiindex_nans(self, one_row):
627+
# GH 14249
628+
df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]})
629+
if one_row:
630+
df = df.iloc[[0]]
631+
observed = df.set_index(['a', 'b']).to_latex()
632+
expected = r"""\begin{tabular}{llr}
633+
\toprule
634+
& & c \\
635+
a & b & \\
636+
\midrule
637+
NaN & 2 & 4 \\
638+
"""
639+
if not one_row:
640+
expected += r"""1.0 & 3 & 5 \\
641+
"""
642+
expected += r"""\bottomrule
643+
\end{tabular}
644+
"""
645+
assert observed == expected

0 commit comments

Comments
 (0)