diff --git a/pandas/formats/format.py b/pandas/formats/format.py index c3ffc018d1031..dd1ce1d446e88 100644 --- a/pandas/formats/format.py +++ b/pandas/formats/format.py @@ -1277,29 +1277,41 @@ def _write_hierarchical_rows(self, fmt_values, indent): def _get_level_lengths(levels, sentinel=''): - from itertools import groupby + """For each index in each level the function returns lengths of indexes. - def _make_grouper(): - record = {'count': 0} + Parameters + ---------- + levels : list of lists + List of values on for level. + sentinel : string, optional + Value which states that no new index starts on there. - def grouper(x): - if x != sentinel: - record['count'] += 1 - return record['count'] + Returns + ---------- + Returns list of maps. For each level returns map of indexes (key is index + in row and value is length of index). + """ + if len(levels) == 0: + return [] - return grouper + control = [True for x in levels[0]] result = [] - for lev in levels: - i = 0 - f = _make_grouper() - recs = {} - for key, gpr in groupby(lev, f): - values = list(gpr) - recs[i] = len(values) - i += len(values) + for level in levels: + last_index = 0 - result.append(recs) + lengths = {} + for i, key in enumerate(level): + if control[i] and key == sentinel: + pass + else: + control[i] = False + lengths[last_index] = i - last_index + last_index = i + + lengths[last_index] = len(level) - last_index + + result.append(lengths) return result @@ -1762,7 +1774,6 @@ def _format_value(self, val): return val def _format_header_mi(self): - if self.columns.nlevels > 1: if not self.index: raise NotImplementedError("Writing to Excel with MultiIndex" diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 4c26480a0f583..775465ea9372d 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -431,10 +431,13 @@ def _parse_cell(cell_contents, cell_typ): if header is not None: if com.is_list_like(header): header_names = [] + control_row = [True for x in data[0]] for row in header: if com.is_integer(skiprows): row += skiprows - data[row] = _fill_mi_header(data[row]) + + data[row], control_row = _fill_mi_header( + data[row], control_row) header_name, data[row] = _pop_header_name( data[row], index_col) header_names.append(header_name) @@ -511,16 +514,35 @@ def _trim_excel_header(row): return row -def _fill_mi_header(row): - # forward fill blanks entries - # from headers if parsing as MultiIndex +def _fill_mi_header(row, control_row): + """Forward fills blank entries in row, but only inside the same parent index + + Used for creating headers in Multiindex. + Parameters + ---------- + row : list + List of items in a single row. + constrol_row : list of boolean + Helps to determine if particular column is in same parent index as the + previous value. Used to stop propagation of empty cells between + different indexes. + + Returns + ---------- + Returns changed row and control_row + """ last = row[0] for i in range(1, len(row)): + if not control_row[i]: + last = row[i] + if row[i] == '' or row[i] is None: row[i] = last else: + control_row[i] = False last = row[i] - return row + + return row, control_row # fill blank if index_col not None diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index af053450d78c4..4b0bcd85b26a9 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -726,6 +726,46 @@ def test_read_excel_multiindex(self): header=[0, 1], skiprows=2) tm.assert_frame_equal(actual, expected) + def test_read_excel_multiindex_empty_level(self): + # GH 12453 + _skip_if_no_xlsxwriter() + with ensure_clean('.xlsx') as path: + df = DataFrame({ + ('Zero', ''): {0: 0}, + ('One', 'x'): {0: 1}, + ('Two', 'X'): {0: 3}, + ('Two', 'Y'): {0: 7} + }) + + expected = DataFrame({ + ('Zero', 'Unnamed: 3_level_1'): {0: 0}, + ('One', u'x'): {0: 1}, + ('Two', u'X'): {0: 3}, + ('Two', u'Y'): {0: 7} + }) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + + df = pd.DataFrame({ + ('Beg', ''): {0: 0}, + ('Middle', 'x'): {0: 1}, + ('Tail', 'X'): {0: 3}, + ('Tail', 'Y'): {0: 7} + }) + + expected = pd.DataFrame({ + ('Beg', 'Unnamed: 0_level_1'): {0: 0}, + ('Middle', u'x'): {0: 1}, + ('Tail', u'X'): {0: 3}, + ('Tail', u'Y'): {0: 7} + }) + + df.to_excel(path) + actual = pd.read_excel(path, header=[0, 1]) + tm.assert_frame_equal(actual, expected) + def test_excel_multindex_roundtrip(self): # GH 4679 _skip_if_no_xlsxwriter()