Skip to content

Commit cac4ad2

Browse files
committed
Merge pull request #11237 from chris-b1/excel-column-bugs
BUG: to_excel duplicate columns
2 parents 26db172 + dba4d1f commit cac4ad2

File tree

3 files changed

+41
-13
lines changed

3 files changed

+41
-13
lines changed

doc/source/whatsnew/v0.17.1.txt

+4
Original file line numberDiff line numberDiff line change
@@ -70,3 +70,7 @@ Bug Fixes
7070

7171

7272
- Bug in ``DataFrame.to_latex()`` produces an extra rule when ``header=False`` (:issue:`7124`)
73+
74+
75+
76+
- Bugs in ``to_excel`` with duplicate columns (:issue:`11007`, :issue:`10982`, :issue:`10970`)

pandas/core/format.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -1683,12 +1683,12 @@ class ExcelFormatter(object):
16831683
def __init__(self, df, na_rep='', float_format=None, cols=None,
16841684
header=True, index=True, index_label=None, merge_cells=False,
16851685
inf_rep='inf'):
1686-
self.df = df
16871686
self.rowcounter = 0
16881687
self.na_rep = na_rep
1689-
self.columns = cols
1690-
if cols is None:
1691-
self.columns = df.columns
1688+
self.df = df
1689+
if cols is not None:
1690+
self.df = df.loc[:, cols]
1691+
self.columns = self.df.columns
16921692
self.float_format = float_format
16931693
self.index = index
16941694
self.index_label = index_label
@@ -1843,12 +1843,9 @@ def _format_regular_rows(self):
18431843
for idx, idxval in enumerate(index_values):
18441844
yield ExcelCell(self.rowcounter + idx, 0, idxval, header_style)
18451845

1846-
# Get a frame that will account for any duplicates in the column names.
1847-
col_mapped_frame = self.df.loc[:, self.columns]
1848-
18491846
# Write the body of the frame data series by series.
18501847
for colidx in range(len(self.columns)):
1851-
series = col_mapped_frame.iloc[:, colidx]
1848+
series = self.df.iloc[:, colidx]
18521849
for i, val in enumerate(series):
18531850
yield ExcelCell(self.rowcounter + i, colidx + coloffset, val)
18541851

@@ -1917,12 +1914,9 @@ def _format_hierarchical_rows(self):
19171914
header_style)
19181915
gcolidx += 1
19191916

1920-
# Get a frame that will account for any duplicates in the column names.
1921-
col_mapped_frame = self.df.loc[:, self.columns]
1922-
19231917
# Write the body of the frame data series by series.
19241918
for colidx in range(len(self.columns)):
1925-
series = col_mapped_frame.iloc[:, colidx]
1919+
series = self.df.iloc[:, colidx]
19261920
for i, val in enumerate(series):
19271921
yield ExcelCell(self.rowcounter + i, gcolidx + colidx, val)
19281922

pandas/io/tests/test_excel.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -1346,7 +1346,7 @@ def roundtrip2(df, header=True, parser_hdr=0, index=True):
13461346

13471347

13481348
def test_duplicated_columns(self):
1349-
# Test for issue #5235.
1349+
# Test for issue #5235
13501350
_skip_if_no_xlrd()
13511351

13521352
with ensure_clean(self.ext) as path:
@@ -1358,7 +1358,20 @@ def test_duplicated_columns(self):
13581358

13591359
read_frame = read_excel(path, 'test1')
13601360
read_frame.columns = colnames
1361+
tm.assert_frame_equal(write_frame, read_frame)
1362+
1363+
# 11007 / #10970
1364+
write_frame = DataFrame([[1,2,3,4],[5,6,7,8]],
1365+
columns=['A','B','A','B'])
1366+
write_frame.to_excel(path, 'test1')
1367+
read_frame = read_excel(path, 'test1')
1368+
read_frame.columns = ['A','B','A','B']
1369+
tm.assert_frame_equal(write_frame, read_frame)
13611370

1371+
# 10982
1372+
write_frame.to_excel(path, 'test1', index=False, header=False)
1373+
read_frame = read_excel(path, 'test1', header=None)
1374+
write_frame.columns = [0, 1, 2, 3]
13621375
tm.assert_frame_equal(write_frame, read_frame)
13631376

13641377
def test_swapped_columns(self):
@@ -1375,6 +1388,23 @@ def test_swapped_columns(self):
13751388
tm.assert_series_equal(write_frame['A'], read_frame['A'])
13761389
tm.assert_series_equal(write_frame['B'], read_frame['B'])
13771390

1391+
def test_invalid_columns(self):
1392+
# 10982
1393+
_skip_if_no_xlrd()
1394+
1395+
with ensure_clean(self.ext) as path:
1396+
write_frame = DataFrame({'A': [1, 1, 1],
1397+
'B': [2, 2, 2]})
1398+
1399+
write_frame.to_excel(path, 'test1', columns=['B', 'C'])
1400+
expected = write_frame.loc[:, ['B','C']]
1401+
read_frame = read_excel(path, 'test1')
1402+
tm.assert_frame_equal(expected, read_frame)
1403+
1404+
with tm.assertRaises(KeyError):
1405+
write_frame.to_excel(path, 'test1', columns=['C', 'D'])
1406+
1407+
13781408
def test_datetimes(self):
13791409

13801410
# Test writing and reading datetimes. For issue #9139. (xref #9185)

0 commit comments

Comments
 (0)