ENH: MultiIndex Structure for DataFrame.style

TomAugspurger · TomAugspurger · commit ecba615c2ffe · 2016-07-25T16:54:19.000-05:00
BUG: Fix index class level row

MVP

Columns too

tests
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -314,6 +314,7 @@ Other enhancements
 - ``Series.append`` now supports the ``ignore_index`` option (:issue:`13677`)
 - ``.to_stata()`` and ``StataWriter`` can now write variable labels to Stata dta files using a dictionary to make column names to labels (:issue:`13535`, :issue:`13536`)
 - ``.to_stata()`` and ``StataWriter`` will automatically convert ``datetime64[ns]`` columns to Stata format ``%tc``, rather than raising a ``ValueError`` (:issue:`12259`)
+- ``DataFrame.style`` will now render sparsified MultiIndexes (:issue:`11655`)
 - ``DataFrame`` has gained support to re-order the columns based on the values
   in a row using ``df.sort_values(by='...', axis=1)`` (:issue:`10806`)
 
diff --git a/pandas/formats/style.py b/pandas/formats/style.py
@@ -21,7 +21,8 @@
 
 import numpy as np
 import pandas as pd
-from pandas.compat import lzip, range
+from pandas.compat import range
+import pandas.core.common as com
 from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice
 try:
     import matplotlib.pyplot as plt
@@ -110,7 +111,9 @@ class Styler(object):
             {% for r in head %}
             <tr>
                 {% for c in r %}
-                <{{c.type}} class="{{c.class}}">{{c.value}}
+                {% if c.is_visible != False %}
+                <{{c.type}} class="{{c.class}}" {{ c.attributes|join(" ") }}>{{c.value}}
+                {% endif %}
                 {% endfor %}
             </tr>
             {% endfor %}
@@ -119,8 +122,10 @@ class Styler(object):
             {% for r in body %}
             <tr>
                 {% for c in r %}
-                <{{c.type}} id="T_{{uuid}}{{c.id}}" class="{{c.class}}">
+                {% if c.is_visible != False %}
+                <{{c.type}} id="T_{{uuid}}{{c.id}}" class="{{c.class}}" {{ c.attributes|join(" ") }}>
                     {{ c.display_value }}
+                {% endif %}
                 {% endfor %}
             </tr>
             {% endfor %}
@@ -181,17 +186,20 @@ def _translate(self):
         BLANK_CLASS = "blank"
         BLANK_VALUE = ""
 
+        def format_attr(pair):
+            return "{key}={value}".format(**pair)
+
+        # for sparsifying a MultiIndex
+        idx_lengths = _get_level_lengths(self.index)
+        col_lengths = _get_level_lengths(self.columns)
+
         cell_context = dict()
 
         n_rlvls = self.data.index.nlevels
         n_clvls = self.data.columns.nlevels
         rlabels = self.data.index.tolist()
         clabels = self.data.columns.tolist()
 
-        idx_values = self.data.index.format(sparsify=False, adjoin=False,
-                                            names=False)
-        idx_values = lzip(*idx_values)
-
         if n_rlvls == 1:
             rlabels = [[x] for x in rlabels]
         if n_clvls == 1:
@@ -213,7 +221,13 @@ def _translate(self):
                 row_es.append({"type": "th",
                                "value": value,
                                "display_value": value,
-                               "class": " ".join(cs)})
+                               "class": " ".join(cs),
+                               "is_visible": _is_visible(c, r, col_lengths),
+                               "attributes": [
+                                   format_attr({"key": "colspan",
+                                                "value": col_lengths.get(
+                                                    (r, c), 1)})
+                               ]})
             head.append(row_es)
 
         if self.data.index.names and self.data.index.names != [None]:
@@ -236,12 +250,17 @@ def _translate(self):
 
         body = []
         for r, idx in enumerate(self.data.index):
-            cs = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r]
-            cs.extend(
-                cell_context.get("row_headings", {}).get(r, {}).get(c, []))
+            #  cs.extend(
+            #    cell_context.get("row_headings", {}).get(r, {}).get(c, []))
             row_es = [{"type": "th",
+                       "is_visible": _is_visible(r, c, idx_lengths),
+                       "attributes": [
+                           format_attr({"key": "rowspan",
+                                        "value": idx_lengths.get((c, r), 1)})
+                       ],
                        "value": rlabels[r][c],
-                       "class": " ".join(cs),
+                       "class": " ".join([ROW_HEADING_CLASS, "level%s" % c,
+                                          "row%s" % r]),
                        "display_value": rlabels[r][c]}
                       for c in range(len(rlabels[r]))]
 
@@ -893,6 +912,38 @@ def _highlight_extrema(data, color='yellow', max_=True):
                                 index=data.index, columns=data.columns)
 
 
+def _is_visible(idx_row, idx_col, lengths):
+    """
+    Index -> {(idx_row, idx_col): bool})
+    """
+    return (idx_col, idx_row) in lengths
+
+
+def _get_level_lengths(index):
+    '''
+    Given an index, find the level lenght for each element.
+
+    Result is a dictionary of (level, inital_position): span
+    '''
+    sentinel = com.sentinel_factory()
+    levels = index.format(sparsify=sentinel, adjoin=False, names=False)
+
+    if index.nlevels == 1:
+        return {(0, i): 1 for i, value in enumerate(levels)}
+
+    lengths = {}
+
+    for i, lvl in enumerate(levels):
+        for j, row in enumerate(lvl):
+            if row != sentinel:
+                last_label = j
+                lengths[(i, last_label)] = 1
+            else:
+                lengths[(i, last_label)] += 1
+
+    return lengths
+
+
 def _maybe_wrap_formatter(formatter):
     if is_string_like(formatter):
         return lambda x: formatter.format(x)
diff --git a/pandas/tests/formats/test_style.py b/pandas/tests/formats/test_style.py
@@ -8,11 +8,6 @@
 from pandas.util.testing import TestCase
 import pandas.util.testing as tm
 
-# this is a mess. Getting failures on a python 2.7 build with
-# whenever we try to import jinja, whether it's installed or not.
-# so we're explicitly skipping that one *before* we try to import
-# jinja. We still need to export the imports as globals,
-# since importing Styler tries to import jinja2.
 job_name = os.environ.get('JOB_NAME', None)
 if job_name == '27_slow_nnet_LOCALE':
     raise SkipTest("No jinja")
@@ -22,7 +17,7 @@
     import jinja2  # noqa
 except ImportError:
     raise SkipTest("No Jinja2")
-from pandas.formats.style import Styler  # noqa
+from pandas.formats.style import Styler, _get_level_lengths  # noqa
 
 
 class TestStyler(TestCase):
@@ -152,15 +147,24 @@ def test_empty_index_name_doesnt_display(self):
                      {'class': 'col_heading level0 col0',
                       'display_value': 'A',
                       'type': 'th',
-                      'value': 'A'},
+                      'value': 'A',
+                      'is_visible': True,
+                      'attributes': ["colspan=1"],
+                      },
                      {'class': 'col_heading level0 col1',
                       'display_value': 'B',
                       'type': 'th',
-                      'value': 'B'},
+                      'value': 'B',
+                      'is_visible': True,
+                      'attributes': ["colspan=1"],
+                      },
                      {'class': 'col_heading level0 col2',
                       'display_value': 'C',
                       'type': 'th',
-                      'value': 'C'}]]
+                      'value': 'C',
+                      'is_visible': True,
+                      'attributes': ["colspan=1"],
+                      }]]
 
         self.assertEqual(result['head'], expected)
 
@@ -171,9 +175,11 @@ def test_index_name(self):
 
         expected = [[{'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'col_heading level0 col0', 'type': 'th',
-                      'value': 'B', 'display_value': 'B'},
+                      'value': 'B', 'display_value': 'B',
+                      'is_visible': True, 'attributes': ['colspan=1']},
                      {'class': 'col_heading level0 col1', 'type': 'th',
-                      'value': 'C', 'display_value': 'C'}],
+                      'value': 'C', 'display_value': 'C',
+                      'is_visible': True, 'attributes': ['colspan=1']}],
                     [{'class': 'col_heading level2 col0', 'type': 'th',
                       'value': 'A'},
                      {'class': 'blank', 'type': 'th', 'value': ''},
@@ -189,7 +195,9 @@ def test_multiindex_name(self):
         expected = [[{'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'col_heading level0 col0', 'type': 'th',
-                      'value': 'C', 'display_value': 'C'}],
+                      'value': 'C', 'display_value': 'C',
+                      'is_visible': True, 'attributes': ['colspan=1'],
+                      }],
                     [{'class': 'col_heading level2 col0', 'type': 'th',
                       'value': 'A'},
                      {'class': 'col_heading level2 col1', 'type': 'th',
@@ -581,6 +589,32 @@ def f(x):
         with tm.assertRaises(ValueError):
             df.style._apply(f, axis=None)
 
+    def test_get_level_lengths(self):
+        index = pd.MultiIndex.from_product([['a', 'b'], [0, 1, 2]])
+        expected = {(0, 0): 3, (0, 3): 3, (1, 0): 1, (1, 1): 1, (1, 2): 1,
+                    (1, 3): 1, (1, 4): 1, (1, 5): 1}
+        result = _get_level_lengths(index)
+        self.assertDictEqual(result, expected)
+
+    def test_get_level_lengths_un_sorted(self):
+        index = pd.MultiIndex.from_arrays([
+            [1, 1, 2, 1],
+            ['a', 'b', 'b', 'd']
+        ])
+        expected = {(0, 0): 2, (0, 2): 1, (0, 3): 1,
+                    (1, 0): 1, (1, 1): 1, (1, 2): 1, (1, 3): 1}
+        result = _get_level_lengths(index)
+        self.assertDictEqual(result, expected)
+
+    def test_mi_sparse(self):
+        df = pd.DataFrame({'A': [1, 2, 3, 4]},
+                          index=pd.MultiIndex.from_product([['a', 'b'],
+                                                           [0, 1]]))
+        result = df.style.render()
+        assert 'rowspan' in result
+        result = df.T.style.render()
+        assert 'colspan' in result
+
 
 @tm.mplskip
 class TestStylerMatplotlibDep(TestCase):