Skip to content

Commit 4419146

Browse files
committed
suggested edits
1 parent c10b931 commit 4419146

9 files changed

+12
-33
lines changed

pandas/io/parsers/base_parser.py

+3
Original file line numberDiff line numberDiff line change
@@ -532,6 +532,9 @@ def _convert_to_ndarrays(
532532
conv_f = None if converters is None else converters.get(c, None)
533533
if isinstance(dtypes, dict):
534534
cast_type = dtypes.get(c, None)
535+
if cast_type is None and c.split(".")[-1].isnumeric:
536+
orig_c = ".".join(c.split(".")[:-1])
537+
cast_type = dtypes.get(orig_c, None)
535538
else:
536539
# single dtype or None
537540
cast_type = dtypes

pandas/io/parsers/python_parser.py

+1-13
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
)
2626

2727
from pandas.core.dtypes.common import is_integer
28-
from pandas.core.dtypes.inference import is_dict_like
2928

3029
from pandas.io.parsers.base_parser import (
3130
ParserBase,
@@ -81,10 +80,7 @@ def __init__(self, f: FilePathOrBuffer | list, **kwds):
8180
self.verbose = kwds["verbose"]
8281
self.converters = kwds["converters"]
8382

84-
if isinstance(kwds["dtype"], dict):
85-
self.dtype = kwds["dtype"].copy()
86-
else:
87-
self.dtype = kwds["dtype"]
83+
self.dtype = kwds["dtype"]
8884
self.thousands = kwds["thousands"]
8985
self.decimal = kwds["decimal"]
9086

@@ -420,21 +416,13 @@ def _infer_columns(self):
420416
counts: DefaultDict = defaultdict(int)
421417

422418
for i, col in enumerate(this_columns):
423-
old_col = col
424419
cur_count = counts[col]
425420

426421
if cur_count > 0:
427422
while cur_count > 0:
428423
counts[col] = cur_count + 1
429424
col = f"{col}.{cur_count}"
430425
cur_count = counts[col]
431-
if (
432-
self.dtype is not None
433-
and is_dict_like(self.dtype)
434-
and self.dtype.get(old_col) is not None
435-
and self.dtype.get(col) is None
436-
):
437-
self.dtype.update({col: self.dtype.get(old_col)})
438426

439427
this_columns[i] = col
440428
counts[col] = cur_count + 1
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

pandas/tests/io/excel/test_readers.py

+4-19
Original file line numberDiff line numberDiff line change
@@ -576,8 +576,11 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
576576
def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
577577
# GH#35211
578578
basename = "df_mangle_dup_col_dtypes"
579-
result = pd.read_excel(basename + read_ext, dtype={"a": str, **dtypes})
579+
dtype_dict = {"a": str, **dtypes}
580+
dtype_dict_copy = dtype_dict.copy()
581+
result = pd.read_excel(basename + read_ext, dtype=dtype_dict)
580582
expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
583+
assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462
581584
tm.assert_frame_equal(result, expected)
582585

583586
def test_reader_spaces(self, read_ext):
@@ -1278,24 +1281,6 @@ def test_ignore_chartsheets_by_int(self, request, read_ext):
12781281
):
12791282
pd.read_excel("chartsheet" + read_ext, sheet_name=1)
12801283

1281-
def test_dtype_dict_unchanged_with_duplicate_columns(self, read_ext):
1282-
# GH 42462
1283-
1284-
filename = "test_common_headers" + read_ext
1285-
dtype_dict = {"a": str, "b": str, "c": str}
1286-
dtype_dict_copy = dtype_dict.copy()
1287-
result = pd.read_excel(filename, dtype=dtype_dict)
1288-
expected = DataFrame(
1289-
{
1290-
"a": ["1", "2", "3"],
1291-
"a.1": ["1", "2", "3"],
1292-
"b": ["b1", "b2", "b3"],
1293-
"c": ["c1", "c2", "c3"],
1294-
}
1295-
)
1296-
assert dtype_dict == dtype_dict_copy, "dtype dict changed"
1297-
tm.assert_frame_equal(result, expected)
1298-
12991284

13001285
class TestExcelFileRead:
13011286
@pytest.fixture(autouse=True)

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -245,8 +245,11 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
245245
# GH#35211
246246
parser = all_parsers
247247
data = """a,a\n1,1"""
248-
result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes})
248+
dtype_dict = {"a": str, **dtypes}
249+
dtype_dict_copy = dtype_dict.copy()
250+
result = parser.read_csv(StringIO(data), dtype=dtype_dict)
249251
expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
252+
assert dtype_dict == dtype_dict_copy, "dtype dict changed" # GH 42462
250253
tm.assert_frame_equal(result, expected)
251254

252255

0 commit comments

Comments
 (0)