Skip to content

Commit 5954940

Browse files
committed
DOC: Surface / doc mangle_dupe_cols in read_excel
xref pandas-devgh-10523.
1 parent a197837 commit 5954940

File tree

2 files changed

+39
-21
lines changed

2 files changed

+39
-21
lines changed

pandas/io/excel.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -175,12 +175,16 @@
175175
convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
176176
data will be read in as floats: Excel stores all numbers as floats
177177
internally
178+
mangle_dupe_cols : boolean, default True
179+
Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
180+
'X'...'X'. Passing in False will cause data to be overwritten if there
181+
are duplicate names in the columns.
178182
179183
Returns
180184
-------
181185
parsed : DataFrame or Dict of DataFrames
182-
DataFrame from the passed in Excel file. See notes in sheet_name
183-
argument for more information on when a Dict of Dataframes is returned.
186+
DataFrame from the passed in Excel file. See notes in sheet_name
187+
argument for more information on when a dict of DataFrames is returned.
184188
185189
Examples
186190
--------
@@ -314,6 +318,7 @@ def read_excel(io,
314318
comment=None,
315319
skipfooter=0,
316320
convert_float=True,
321+
mangle_dupe_cols=True,
317322
**kwds):
318323

319324
# Can't use _deprecate_kwarg since sheetname=None has a special meaning
@@ -349,6 +354,7 @@ def read_excel(io,
349354
comment=comment,
350355
skipfooter=skipfooter,
351356
convert_float=convert_float,
357+
mangle_dupe_cols=mangle_dupe_cols,
352358
**kwds)
353359

354360

@@ -441,6 +447,7 @@ def parse(self,
441447
comment=None,
442448
skipfooter=0,
443449
convert_float=True,
450+
mangle_dupe_cols=True,
444451
**kwds):
445452
"""
446453
Parse specified sheet(s) into a DataFrame
@@ -476,6 +483,7 @@ def parse(self,
476483
comment=comment,
477484
skipfooter=skipfooter,
478485
convert_float=convert_float,
486+
mangle_dupe_cols=mangle_dupe_cols,
479487
**kwds)
480488

481489
def _parse_excel(self,
@@ -498,6 +506,7 @@ def _parse_excel(self,
498506
comment=None,
499507
skipfooter=0,
500508
convert_float=True,
509+
mangle_dupe_cols=True,
501510
**kwds):
502511

503512
_validate_header_arg(header)
@@ -667,6 +676,7 @@ def _parse_cell(cell_contents, cell_typ):
667676
comment=comment,
668677
skipfooter=skipfooter,
669678
usecols=usecols,
679+
mangle_dupe_cols=mangle_dupe_cols,
670680
**kwds)
671681

672682
output[asheetname] = parser.read(nrows=nrows)

pandas/tests/io/test_excel.py

+27-19
Original file line numberDiff line numberDiff line change
@@ -1846,33 +1846,41 @@ def roundtrip(data, header=True, parser_hdr=0, index=True):
18461846

18471847
def test_duplicated_columns(self, *_):
18481848
# see gh-5235
1849-
write_frame = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]])
1850-
col_names = ["A", "B", "B"]
1851-
1852-
write_frame.columns = col_names
1853-
write_frame.to_excel(self.path, "test1")
1849+
df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]],
1850+
columns=["A", "B", "B"])
1851+
df.to_excel(self.path, "test1")
1852+
expected = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]],
1853+
columns=["A", "B", "B.1"])
18541854

1855-
read_frame = read_excel(self.path, "test1", index_col=0)
1856-
read_frame.columns = col_names
1855+
# By default, we mangle.
1856+
result = read_excel(self.path, "test1", index_col=0)
1857+
tm.assert_frame_equal(result, expected)
18571858

1858-
tm.assert_frame_equal(write_frame, read_frame)
1859+
# Explicitly, we pass in the parameter.
1860+
result = read_excel(self.path, "test1", index_col=0,
1861+
mangle_dupe_cols=True)
1862+
tm.assert_frame_equal(result, expected)
18591863

18601864
# see gh-11007, gh-10970
1861-
write_frame = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
1862-
columns=["A", "B", "A", "B"])
1863-
write_frame.to_excel(self.path, "test1")
1864-
1865-
read_frame = read_excel(self.path, "test1", index_col=0)
1866-
read_frame.columns = ["A", "B", "A", "B"]
1865+
df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
1866+
columns=["A", "B", "A", "B"])
1867+
df.to_excel(self.path, "test1")
18671868

1868-
tm.assert_frame_equal(write_frame, read_frame)
1869+
result = read_excel(self.path, "test1", index_col=0)
1870+
expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]],
1871+
columns=["A", "B", "A.1", "B.1"])
1872+
tm.assert_frame_equal(result, expected)
18691873

18701874
# see gh-10982
1871-
write_frame.to_excel(self.path, "test1", index=False, header=False)
1872-
read_frame = read_excel(self.path, "test1", header=None)
1875+
df.to_excel(self.path, "test1", index=False, header=False)
1876+
result = read_excel(self.path, "test1", header=None)
18731877

1874-
write_frame.columns = [0, 1, 2, 3]
1875-
tm.assert_frame_equal(write_frame, read_frame)
1878+
expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
1879+
tm.assert_frame_equal(result, expected)
1880+
1881+
msg = "Setting mangle_dupe_cols=False is not supported yet"
1882+
with pytest.raises(ValueError, match=msg):
1883+
read_excel(self.path, "test1", header=None, mangle_dupe_cols=False)
18761884

18771885
def test_swapped_columns(self, merge_cells, engine, ext):
18781886
# Test for issue #5427.

0 commit comments

Comments
 (0)