From 2a72b6000fa915938933afc36fa322988a5f013e Mon Sep 17 00:00:00 2001 From: Tom Neep Date: Mon, 26 Feb 2018 16:23:38 +0100 Subject: [PATCH] BUG: fix to_latex() when using MultiIndex with NaN in (#14249) --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/io/formats/latex.py | 14 ++++++++------ pandas/tests/io/formats/test_to_latex.py | 22 ++++++++++++++++++++++ 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index c08e22af295f4..cbfc49d7fa3ab 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -966,6 +966,7 @@ I/O - Bug in :func:`read_csv` where missing values were not being handled properly when ``keep_default_na=False`` with dictionary ``na_values`` (:issue:`19227`) - Bug in :func:`read_sas` where a file with 0 variables gave an ``AttributeError`` incorrectly. Now it gives an ``EmptyDataError`` (:issue:`18184`) - Bug in :func:`DataFrame.to_latex()` where pairs of braces meant to serve as invisible placeholders were escaped (:issue:`18667`) +- Bug in :func:`DataFrame.to_latex()` where a ``NaN`` in a ``MultiIndex`` would cause an ``IndexError`` or incorrect output (:issue:`14249`) - Bug in :func:`read_json` where large numeric values were causing an ``OverflowError`` (:issue:`18842`) - Bug in :func:`DataFrame.to_parquet` where an exception was raised if the write destination is S3 (:issue:`19134`) - :class:`Interval` now supported in :func:`DataFrame.to_excel` for all Excel file types (:issue:`19242`) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 67b0a4f0e034e..e85fcf9edaff4 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -72,7 +72,8 @@ def get_col_type(dtype): previous_lev3 = None for i, lev in enumerate(self.frame.index.levels): lev2 = lev.format() - blank = ' ' * len(lev2[0]) + blank = (' ' * len(lev2[0]) if lev2 else + ' ' * len(self.fmt.na_rep)) # display column names in last index-column if cname and i == lastcol: lev3 = [x if x else '{}' for x in self.frame.columns.names] @@ -82,15 +83,16 @@ def get_col_type(dtype): lev3.append(lev.name) current_idx_val = None for level_idx in self.frame.index.labels[i]: + idx_val = (lev2[level_idx] if level_idx >= 0 else + self.fmt.na_rep) if ((previous_lev3 is None or - previous_lev3[len(lev3)].isspace()) and - lev2[level_idx] == current_idx_val): + previous_lev3[len(lev3)].isspace()) and + idx_val == current_idx_val): # same index as above row and left index was the same lev3.append(blank) else: - # different value than above or left index different - lev3.append(lev2[level_idx]) - current_idx_val = lev2[level_idx] + lev3.append(idx_val) + current_idx_val = idx_val strcols.insert(i, lev3) previous_lev3 = lev3 diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 5ebf196be094e..a0f1da855a3b3 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -621,3 +621,25 @@ def test_to_latex_multiindex_names(self, name0, name1, axes): \end{tabular} """ % tuple(list(col_names) + [idx_names_row]) assert observed == expected + + @pytest.mark.parametrize('one_row', [True, False]) + def test_to_latex_multiindex_nans(self, one_row): + # GH 14249 + df = pd.DataFrame({'a': [None, 1], 'b': [2, 3], 'c': [4, 5]}) + if one_row: + df = df.iloc[[0]] + observed = df.set_index(['a', 'b']).to_latex() + expected = r"""\begin{tabular}{llr} +\toprule + & & c \\ +a & b & \\ +\midrule +NaN & 2 & 4 \\ +""" + if not one_row: + expected += r"""1.0 & 3 & 5 \\ +""" + expected += r"""\bottomrule +\end{tabular} +""" + assert observed == expected