Skip to content

Commit 67b72e3

Browse files
brandys11jreback
authored andcommitted
[BUG] Reading multiindex, incorrectly names columns without name.
closes #12453 Author: Jozef Brandys <[email protected]> Closes #13115 from brandys11/excel_multiindex_empty_name and squashes the following commits: 7953aee [Jozef Brandys] [BUG] Reading multiindex, incorrectly names columns without name.
1 parent b1bfd2f commit 67b72e3

File tree

3 files changed

+96
-23
lines changed

3 files changed

+96
-23
lines changed

pandas/formats/format.py

+29-18
Original file line numberDiff line numberDiff line change
@@ -1277,29 +1277,41 @@ def _write_hierarchical_rows(self, fmt_values, indent):
12771277

12781278

12791279
def _get_level_lengths(levels, sentinel=''):
1280-
from itertools import groupby
1280+
"""For each index in each level the function returns lengths of indexes.
12811281
1282-
def _make_grouper():
1283-
record = {'count': 0}
1282+
Parameters
1283+
----------
1284+
levels : list of lists
1285+
List of values on for level.
1286+
sentinel : string, optional
1287+
Value which states that no new index starts on there.
12841288
1285-
def grouper(x):
1286-
if x != sentinel:
1287-
record['count'] += 1
1288-
return record['count']
1289+
Returns
1290+
----------
1291+
Returns list of maps. For each level returns map of indexes (key is index
1292+
in row and value is length of index).
1293+
"""
1294+
if len(levels) == 0:
1295+
return []
12891296

1290-
return grouper
1297+
control = [True for x in levels[0]]
12911298

12921299
result = []
1293-
for lev in levels:
1294-
i = 0
1295-
f = _make_grouper()
1296-
recs = {}
1297-
for key, gpr in groupby(lev, f):
1298-
values = list(gpr)
1299-
recs[i] = len(values)
1300-
i += len(values)
1300+
for level in levels:
1301+
last_index = 0
13011302

1302-
result.append(recs)
1303+
lengths = {}
1304+
for i, key in enumerate(level):
1305+
if control[i] and key == sentinel:
1306+
pass
1307+
else:
1308+
control[i] = False
1309+
lengths[last_index] = i - last_index
1310+
last_index = i
1311+
1312+
lengths[last_index] = len(level) - last_index
1313+
1314+
result.append(lengths)
13031315

13041316
return result
13051317

@@ -1762,7 +1774,6 @@ def _format_value(self, val):
17621774
return val
17631775

17641776
def _format_header_mi(self):
1765-
17661777
if self.columns.nlevels > 1:
17671778
if not self.index:
17681779
raise NotImplementedError("Writing to Excel with MultiIndex"

pandas/io/excel.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -431,10 +431,13 @@ def _parse_cell(cell_contents, cell_typ):
431431
if header is not None:
432432
if com.is_list_like(header):
433433
header_names = []
434+
control_row = [True for x in data[0]]
434435
for row in header:
435436
if com.is_integer(skiprows):
436437
row += skiprows
437-
data[row] = _fill_mi_header(data[row])
438+
439+
data[row], control_row = _fill_mi_header(
440+
data[row], control_row)
438441
header_name, data[row] = _pop_header_name(
439442
data[row], index_col)
440443
header_names.append(header_name)
@@ -511,16 +514,35 @@ def _trim_excel_header(row):
511514
return row
512515

513516

514-
def _fill_mi_header(row):
515-
# forward fill blanks entries
516-
# from headers if parsing as MultiIndex
517+
def _fill_mi_header(row, control_row):
518+
"""Forward fills blank entries in row, but only inside the same parent index
519+
520+
Used for creating headers in Multiindex.
521+
Parameters
522+
----------
523+
row : list
524+
List of items in a single row.
525+
constrol_row : list of boolean
526+
Helps to determine if particular column is in same parent index as the
527+
previous value. Used to stop propagation of empty cells between
528+
different indexes.
529+
530+
Returns
531+
----------
532+
Returns changed row and control_row
533+
"""
517534
last = row[0]
518535
for i in range(1, len(row)):
536+
if not control_row[i]:
537+
last = row[i]
538+
519539
if row[i] == '' or row[i] is None:
520540
row[i] = last
521541
else:
542+
control_row[i] = False
522543
last = row[i]
523-
return row
544+
545+
return row, control_row
524546

525547
# fill blank if index_col not None
526548

pandas/io/tests/test_excel.py

+40
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,46 @@ def test_read_excel_multiindex(self):
725725
header=[0, 1], skiprows=2)
726726
tm.assert_frame_equal(actual, expected)
727727

728+
def test_read_excel_multiindex_empty_level(self):
729+
# GH 12453
730+
_skip_if_no_xlsxwriter()
731+
with ensure_clean('.xlsx') as path:
732+
df = DataFrame({
733+
('Zero', ''): {0: 0},
734+
('One', 'x'): {0: 1},
735+
('Two', 'X'): {0: 3},
736+
('Two', 'Y'): {0: 7}
737+
})
738+
739+
expected = DataFrame({
740+
('Zero', 'Unnamed: 3_level_1'): {0: 0},
741+
('One', u'x'): {0: 1},
742+
('Two', u'X'): {0: 3},
743+
('Two', u'Y'): {0: 7}
744+
})
745+
746+
df.to_excel(path)
747+
actual = pd.read_excel(path, header=[0, 1])
748+
tm.assert_frame_equal(actual, expected)
749+
750+
df = pd.DataFrame({
751+
('Beg', ''): {0: 0},
752+
('Middle', 'x'): {0: 1},
753+
('Tail', 'X'): {0: 3},
754+
('Tail', 'Y'): {0: 7}
755+
})
756+
757+
expected = pd.DataFrame({
758+
('Beg', 'Unnamed: 0_level_1'): {0: 0},
759+
('Middle', u'x'): {0: 1},
760+
('Tail', u'X'): {0: 3},
761+
('Tail', u'Y'): {0: 7}
762+
})
763+
764+
df.to_excel(path)
765+
actual = pd.read_excel(path, header=[0, 1])
766+
tm.assert_frame_equal(actual, expected)
767+
728768
def test_excel_multindex_roundtrip(self):
729769
# GH 4679
730770
_skip_if_no_xlsxwriter()

0 commit comments

Comments
 (0)