From 0287a25f2555c72aeca11dfd9efc71dbba9c7d92 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 15 Jun 2021 23:25:53 +0200 Subject: [PATCH 1/2] Regression raising Error when having dup cols with single dtype for read csv --- pandas/_libs/parsers.pyx | 1 + pandas/io/parsers/python_parser.py | 1 + pandas/tests/io/parser/dtypes/test_dtypes_basic.py | 9 +++++++++ 3 files changed, 11 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 7d7074988e5f0..122c7fd518332 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -689,6 +689,7 @@ cdef class TextReader: count = counts.get(name, 0) if ( self.dtype is not None + and isinstance(self.dtype, dict) and self.dtype.get(old_name) is not None and self.dtype.get(name) is None ): diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 670868c6f4261..8718f8ca14444 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -424,6 +424,7 @@ def _infer_columns(self): cur_count = counts[col] if ( self.dtype is not None + and isinstance(self.dtype, dict) and self.dtype.get(old_col) is not None and self.dtype.get(col) is None ): diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 59fd3de60e0bf..bc20f1d1eea5f 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -248,3 +248,12 @@ def test_dtype_mangle_dup_cols(all_parsers, dtypes, exp_value): result = parser.read_csv(StringIO(data), dtype={"a": str, **dtypes}) expected = DataFrame({"a": ["1"], "a.1": [exp_value]}) tm.assert_frame_equal(result, expected) + + +def test_dtype_mangle_dup_cols_single_dtype(all_parsers): + # GH#42022 + parser = all_parsers + data = """a,a\n1,1""" + result = parser.read_csv(StringIO(data), dtype=str) + expected = DataFrame({"a": ["1"], "a.1": ["1"]}) + tm.assert_frame_equal(result, expected) From 72d1ed45fd148349ecde74b0725aa621f562daef Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 16 Jun 2021 00:31:01 +0200 Subject: [PATCH 2/2] Use is_dict_like --- pandas/_libs/parsers.pyx | 3 ++- pandas/io/parsers/python_parser.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 122c7fd518332..e5e61e409c320 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -108,6 +108,7 @@ from pandas.core.dtypes.common import ( is_object_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.core.dtypes.inference import is_dict_like cdef: float64_t INF = np.inf @@ -689,7 +690,7 @@ cdef class TextReader: count = counts.get(name, 0) if ( self.dtype is not None - and isinstance(self.dtype, dict) + and is_dict_like(self.dtype) and self.dtype.get(old_name) is not None and self.dtype.get(name) is None ): diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index 8718f8ca14444..af25a4166d5a6 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -25,6 +25,7 @@ ) from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.inference import is_dict_like from pandas.io.parsers.base_parser import ( ParserBase, @@ -424,7 +425,7 @@ def _infer_columns(self): cur_count = counts[col] if ( self.dtype is not None - and isinstance(self.dtype, dict) + and is_dict_like(self.dtype) and self.dtype.get(old_col) is not None and self.dtype.get(col) is None ):