diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt index 543eea399f447..668873e838597 100755 --- a/doc/source/whatsnew/v0.17.1.txt +++ b/doc/source/whatsnew/v0.17.1.txt @@ -70,3 +70,7 @@ Bug Fixes - Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`) + + + +- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`) diff --git a/pandas/core/format.py b/pandas/core/format.py index bc25d14be3960..22e8d6502b358 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1683,12 +1683,12 @@ class ExcelFormatter(object): def __init__(self, df, na_rep='', float_format=None, cols=None, header=True, index=True, index_label=None, merge_cells=False, inf_rep='inf'): - self.df = df self.rowcounter = 0 self.na_rep = na_rep - self.columns = cols - if cols is None: - self.columns = df.columns + self.df = df + if cols is not None: + self.df = df.loc[:, cols] + self.columns = self.df.columns self.float_format = float_format self.index = index self.index_label = index_label @@ -1843,12 +1843,9 @@ def _format_regular_rows(self): for idx, idxval in enumerate(index_values): yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style) - # Get a frame that will account for any duplicates in the column names. - col_mapped_frame = self.df.loc[:, self.columns] - # Write the body of the frame data series by series. for colidx in range(len(self.columns)): - series = col_mapped_frame.iloc[:, colidx] + series = self.df.iloc[:, colidx] for i, val in enumerate(series): yield ExcelCell(self.rowcounter + i, colidx + coloffset, val) @@ -1917,12 +1914,9 @@ def _format_hierarchical_rows(self): header_style) gcolidx += 1 - # Get a frame that will account for any duplicates in the column names. - col_mapped_frame = self.df.loc[:, self.columns] - # Write the body of the frame data series by series. for colidx in range(len(self.columns)): - series = col_mapped_frame.iloc[:, colidx] + series = self.df.iloc[:, colidx] for i, val in enumerate(series): yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 13bb116638b98..40cbd97ea539f 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1346,7 +1346,7 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True): def test_duplicated_columns(self): - # Test for issue #5235. + # Test for issue #5235 _skip_if_no_xlrd() with ensure_clean(self.ext) as path: @@ -1358,7 +1358,20 @@ def test_duplicated_columns(self): read_frame = read_excel(path, 'test1') read_frame.columns = colnames + tm.assert_frame_equal(write_frame, read_frame) + + # 11007 / #10970 + write_frame = DataFrame([[1,2,3,4],[5,6,7,8]], + columns=['A','B','A','B']) + write_frame.to_excel(path, 'test1') + read_frame = read_excel(path, 'test1') + read_frame.columns = ['A','B','A','B'] + tm.assert_frame_equal(write_frame, read_frame) + # 10982 + write_frame.to_excel(path, 'test1', index=False, header=False) + read_frame = read_excel(path, 'test1', header=None) + write_frame.columns = [0, 1, 2, 3] tm.assert_frame_equal(write_frame, read_frame) def test_swapped_columns(self): @@ -1375,6 +1388,23 @@ def test_swapped_columns(self): tm.assert_series_equal(write_frame['A'], read_frame['A']) tm.assert_series_equal(write_frame['B'], read_frame['B']) + def test_invalid_columns(self): + # 10982 + _skip_if_no_xlrd() + + with ensure_clean(self.ext) as path: + write_frame = DataFrame({'A': [1, 1, 1], + 'B': [2, 2, 2]}) + + write_frame.to_excel(path, 'test1', columns=['B', 'C']) + expected = write_frame.loc[:, ['B','C']] + read_frame = read_excel(path, 'test1') + tm.assert_frame_equal(expected, read_frame) + + with tm.assertRaises(KeyError): + write_frame.to_excel(path, 'test1', columns=['C', 'D']) + + def test_datetimes(self): # Test writing and reading datetimes. For issue #9139. (xref #9185)