Backport PR #42030: Regression raising Error when having dup cols with single dtype for read csv (#42045)

meeseeksmachine · phofl · web-flow · commit 0e11a00e0074 · 2021-06-16T09:02:38.000+01:00
Co-authored-by: Patrick Hoefler &lt;61934744+phofl@users.noreply.github.com&gt;
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -108,6 +108,7 @@ from pandas.core.dtypes.common import (
     is_object_dtype,
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
+from pandas.core.dtypes.inference import is_dict_like
 
 cdef:
     float64_t INF = <float64_t>np.inf
@@ -689,6 +690,7 @@ cdef class TextReader:
                                 count = counts.get(name, 0)
                             if (
                                 self.dtype is not None
+                                and is_dict_like(self.dtype)
                                 and self.dtype.get(old_name) is not None
                                 and self.dtype.get(name) is None
                             ):
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
@@ -25,6 +25,7 @@
 )
 
 from pandas.core.dtypes.common import is_integer
+from pandas.core.dtypes.inference import is_dict_like
 
 from pandas.io.parsers.base_parser import (
     ParserBase,
@@ -424,6 +425,7 @@ def _infer_columns(self):
                                 cur_count = counts[col]
                             if (
                                 self.dtype is not None
+                                and is_dict_like(self.dtype)
                                 and self.dtype.get(old_col) is not None
                                 and self.dtype.get(col) is None
                             ):
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -248,3 +248,12 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value):
     result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes})
     expected = DataFrame({"a": ["1"], "a.1": [exp_value]})
     tm.assert_frame_equal(result, expected)
+
+
+def test_dtype_mangle_dup_cols_single_dtype(all_parsers):
+    # GH#42022
+    parser = all_parsers
+    data = """a,a\n1,1"""
+    result = parser.read_csv(StringIO(data), dtype=str)
+    expected = DataFrame({"a": ["1"], "a.1": ["1"]})
+    tm.assert_frame_equal(result, expected)