Merge pull request #11237 from chris-b1/excel-column-bugs

jreback · jreback · commit cac4ad295e16 · 2015-10-09T20:23:39.000-04:00
BUG: to_excel duplicate columns
diff --git a/doc/source/whatsnew/v0.17.1.txt b/doc/source/whatsnew/v0.17.1.txt
@@ -70,3 +70,7 @@ Bug Fixes
 
 
 - Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`)
+
+
+
+- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`)
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1683,12 +1683,12 @@ class ExcelFormatter(object):
     def __init__(self, df, na_rep='', float_format=None, cols=None,
                  header=True, index=True, index_label=None, merge_cells=False,
                  inf_rep='inf'):
-        self.df = df
         self.rowcounter = 0
         self.na_rep = na_rep
-        self.columns = cols
-        if cols is None:
-            self.columns = df.columns
+        self.df = df
+        if cols is not None:
+            self.df = df.loc[:, cols]
+        self.columns = self.df.columns
         self.float_format = float_format
         self.index = index
         self.index_label = index_label
@@ -1843,12 +1843,9 @@ def _format_regular_rows(self):
             for idx, idxval in enumerate(index_values):
                 yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
 
-        # Get a frame that will account for any duplicates in the column names.
-        col_mapped_frame = self.df.loc[:, self.columns]
-
         # Write the body of the frame data series by series.
         for colidx in range(len(self.columns)):
-            series = col_mapped_frame.iloc[:, colidx]
+            series = self.df.iloc[:, colidx]
             for i, val in enumerate(series):
                 yield ExcelCell(self.rowcounter + i, colidx + coloffset, val)
 
@@ -1917,12 +1914,9 @@ def _format_hierarchical_rows(self):
                                         header_style)
                     gcolidx += 1
 
-        # Get a frame that will account for any duplicates in the column names.
-        col_mapped_frame = self.df.loc[:, self.columns]
-
         # Write the body of the frame data series by series.
         for colidx in range(len(self.columns)):
-            series = col_mapped_frame.iloc[:, colidx]
+            series = self.df.iloc[:, colidx]
             for i, val in enumerate(series):
                 yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)
 
diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -1346,7 +1346,7 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True):
 
 
     def test_duplicated_columns(self):
-        # Test for issue #5235.
+        # Test for issue #5235
         _skip_if_no_xlrd()
 
         with ensure_clean(self.ext) as path:
@@ -1358,7 +1358,20 @@ def test_duplicated_columns(self):
 
             read_frame = read_excel(path, 'test1')
             read_frame.columns = colnames
+            tm.assert_frame_equal(write_frame, read_frame)
+
+            # 11007 / #10970
+            write_frame = DataFrame([[1,2,3,4],[5,6,7,8]],
+                                    columns=['A','B','A','B'])
+            write_frame.to_excel(path, 'test1')
+            read_frame = read_excel(path, 'test1')
+            read_frame.columns = ['A','B','A','B']
+            tm.assert_frame_equal(write_frame, read_frame)
 
+            # 10982
+            write_frame.to_excel(path, 'test1', index=False, header=False)
+            read_frame = read_excel(path, 'test1', header=None)
+            write_frame.columns = [0, 1, 2, 3]
             tm.assert_frame_equal(write_frame, read_frame)
 
     def test_swapped_columns(self):
@@ -1375,6 +1388,23 @@ def test_swapped_columns(self):
             tm.assert_series_equal(write_frame['A'], read_frame['A'])
             tm.assert_series_equal(write_frame['B'], read_frame['B'])
 
+    def test_invalid_columns(self):
+        # 10982
+        _skip_if_no_xlrd()
+
+        with ensure_clean(self.ext) as path:
+            write_frame = DataFrame({'A': [1, 1, 1],
+                                     'B': [2, 2, 2]})
+
+            write_frame.to_excel(path, 'test1', columns=['B', 'C'])
+            expected = write_frame.loc[:, ['B','C']]
+            read_frame = read_excel(path, 'test1')
+            tm.assert_frame_equal(expected, read_frame)
+
+            with tm.assertRaises(KeyError):
+                write_frame.to_excel(path, 'test1', columns=['C', 'D'])
+
+
     def test_datetimes(self):
 
         # Test writing and reading datetimes. For issue #9139. (xref #9185)