Skip to content

Commit 271f0d0

Browse files
Backport PR #42508: read_excel() modifies provided types dict when accessing file with duplicate column (#42893)
Co-authored-by: Shoham Debnath <[email protected]>
1 parent e7e93e3 commit 271f0d0

File tree

4 files changed

+13
-4
lines changed

4 files changed

+13
-4
lines changed

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ Fixed regressions
3030

3131
Bug fixes
3232
~~~~~~~~~
33+
- Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`)
3334
- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`)
3435
- :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`)
3536

pandas/io/parsers/python_parser.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
abc,
55
defaultdict,
66
)
7+
from copy import copy
78
import csv
89
from io import StringIO
910
import re
@@ -81,7 +82,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
8182
self.verbose = kwds["verbose"]
8283
self.converters = kwds["converters"]
8384

84-
self.dtype = kwds["dtype"]
85+
self.dtype = copy(kwds["dtype"])
8586
self.thousands = kwds["thousands"]
8687
self.decimal = kwds["decimal"]
8788

@@ -432,7 +433,6 @@ def _infer_columns(self):
432433
and self.dtype.get(col) is None
433434
):
434435
self.dtype.update({col: self.dtype.get(old_col)})
435-
436436
this_columns[i] = col
437437
counts[col] = cur_count + 1
438438
elif have_mi_columns:

pandas/tests/io/excel/test_readers.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,12 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
576576
def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
577577
# GH#35211
578578
basename = "df_mangle_dup_col_dtypes"
579-
result = pd.read_excel(basename + read_ext, dtype={"a": str, **dtypes})
579+
dtype_dict = {"a": str, **dtypes}
580+
dtype_dict_copy = dtype_dict.copy()
581+
# GH#42462
582+
result = pd.read_excel(basename + read_ext, dtype=dtype_dict)
580583
expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
584+
assert dtype_dict == dtype_dict_copy, "dtype dict changed"
581585
tm.assert_frame_equal(result, expected)
582586

583587
def test_reader_spaces(self, read_ext):

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,12 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
245245
# GH#35211
246246
parser = all_parsers
247247
data = """a,a\n1,1"""
248-
result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes})
248+
dtype_dict = {"a": str, **dtypes}
249+
# GH#42462
250+
dtype_dict_copy = dtype_dict.copy()
251+
result = parser.read_csv(StringIO(data), dtype=dtype_dict)
249252
expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
253+
assert dtype_dict == dtype_dict_copy, "dtype dict changed"
250254
tm.assert_frame_equal(result, expected)
251255

252256

0 commit comments

Comments
 (0)