pandas-dev · jreback · May 12, 2021 · May 10, 2021 · May 10, 2021 · May 12, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -837,6 +837,7 @@ I/O
 - Bug in :func:`read_excel` raising ``AttributeError`` with ``MultiIndex`` header followed by two empty rows and no index, and bug affecting :func:`read_excel`, :func:`read_csv`, :func:`read_table`, :func:`read_fwf`, and :func:`read_clipboard` where one blank row after a ``MultiIndex`` header with no index would be dropped (:issue:`40442`)
 - Bug in :meth:`DataFrame.to_string` misplacing the truncation column when ``index=False`` (:issue:`40907`)
 - Bug in :func:`read_orc` always raising ``AttributeError`` (:issue:`40918`)
+- Bug in :func:`read_csv` and :func:`read_excel` not respecting dtype for duplicated column name when ``mangle_dupe_cols`` is set to ``True`` (:issue:`35211`)
 - Bug in :func:`read_csv` and :func:`read_table` misinterpreting arguments when ``sys.setprofile`` had been previously called (:issue:`41069`)
 - Bug in the conversion from pyarrow to pandas (e.g. for reading Parquet) with nullable dtypes and a pyarrow array whose data buffer size is not a multiple of dtype size (:issue:`40896`)
 

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -685,10 +685,17 @@ cdef class TextReader:
                     count = counts.get(name, 0)
 
                     if not self.has_mi_columns and self.mangle_dupe_cols:
-                        while count > 0:
-                            counts[name] = count + 1
-                            name = f'{name}.{count}'
-                            count = counts.get(name, 0)
+                        if count > 0:
+                            while count > 0:
+                                counts[name] = count + 1
+                                name = f'{name}.{count}'
+                                count = counts.get(name, 0)
+                            if (
+                                self.dtype is not None
+                                and self.dtype.get(old_name) is not None
+                                and self.dtype.get(name) is None
+                            ):
+                                self.dtype.update({name: self.dtype.get(old_name)})
 
                     if old_name == '':
                         unnamed_cols.add(name)

diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -421,12 +421,20 @@ def _infer_columns(self):
                     counts: DefaultDict = defaultdict(int)
 
                     for i, col in enumerate(this_columns):
+                        old_col = col
                         cur_count = counts[col]
 
-                        while cur_count > 0:
-                            counts[col] = cur_count + 1
-                            col = f"{col}.{cur_count}"
-                            cur_count = counts[col]
+                        if cur_count > 0:
+                            while cur_count > 0:
+                                counts[col] = cur_count + 1
+                                col = f"{col}.{cur_count}"
+                                cur_count = counts[col]
+                            if (
+                                self.dtype is not None
+                                and self.dtype.get(old_col) is not None
+                                and self.dtype.get(col) is None
+                            ):
+                                self.dtype.update({col: self.dtype.get(old_col)})
 
                         this_columns[i] = col
                         counts[col] = cur_count + 1

diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.ods b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.ods
diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xls b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xls
diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsb b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsb
diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsm b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsm
diff --git a/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsx b/pandas/tests/io/data/excel/df_mangle_dup_col_dtypes.xlsx
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
@@ -555,6 +555,14 @@ def test_reader_dtype_str(self, read_ext, dtype, expected):
         actual = pd.read_excel(basename + read_ext, dtype=dtype)
         tm.assert_frame_equal(actual, expected)
 
+    @pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
+    def test_dtype_mangle_dup_cols(self, read_ext, dtypes, exp_value):
+        # GH#35211
+        basename = "df_mangle_dup_col_dtypes"
+        result = pd.read_excel(basename + read_ext, dtype={"a": str, **dtypes})
+        expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
+        tm.assert_frame_equal(result, expected)
+
     def test_reader_spaces(self, read_ext):
         # see gh-32207
         basename = "test_spaces"

diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -238,3 +238,13 @@ def test_true_values_cast_to_bool(all_parsers):
     )
     expected["a"] = expected["a"].astype("boolean")
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("dtypes, exp_value", [({}, "1"), ({"a.1": "int64"}, 1)])
+def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
+    # GH#35211
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes})
+    expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
+    tm.assert_frame_equal(result, expected)