From e344a33b4dfaa25695e376814a65e94b02e4bc0d Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Fri, 18 Feb 2022 17:50:48 +0100
Subject: [PATCH 1/2] BUG: read_csv not respecting converter in all cases for
 index col

---
 doc/source/whatsnew/v1.5.0.rst            |  1 +
 pandas/io/parsers/base_parser.py          |  8 +++++++-
 pandas/io/parsers/python_parser.py        |  1 -
 pandas/tests/io/parser/test_converters.py | 20 ++++++++++++++++----
 4 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index c8b2617ffc535..478337f4db24e 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -357,6 +357,7 @@ I/O
 - Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`)
 - Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`)
 - Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`)
+- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
 - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
 - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
 
diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index 7927439abb510..c76e40677ad78 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -103,6 +103,7 @@ def __init__(self, kwds):
         self.keep_default_na = kwds.get("keep_default_na", True)
 
         self.dtype = copy(kwds.get("dtype", None))
+        self.converters = kwds.get("converters")
 
         self.true_values = kwds.get("true_values")
         self.false_values = kwds.get("false_values")
@@ -476,6 +477,7 @@ def _clean_mapping(self, mapping):
     @final
     def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
         arrays = []
+        converters = self._clean_mapping(self.converters)
 
         for i, arr in enumerate(index):
 
@@ -503,7 +505,11 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
             if isinstance(clean_dtypes, dict) and self.index_names is not None:
                 cast_type = clean_dtypes.get(self.index_names[i], None)
 
-            try_num_bool = not (cast_type and is_string_dtype(cast_type))
+            conv = False
+            if isinstance(converters, dict) and self.index_names is not None:
+                conv = converters.get(self.index_names[i]) is not None
+
+            try_num_bool = not (cast_type and is_string_dtype(cast_type) or conv)
 
             arr, _ = self._infer_types(
                 arr, col_na_values | col_na_fvalues, try_num_bool
diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py
index 8a66a5c22caf5..92031cb04e768 100644
--- a/pandas/io/parsers/python_parser.py
+++ b/pandas/io/parsers/python_parser.py
@@ -95,7 +95,6 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds):
             self.has_index_names = kwds["has_index_names"]
 
         self.verbose = kwds["verbose"]
-        self.converters = kwds["converters"]
 
         self.thousands = kwds["thousands"]
         self.decimal = kwds["decimal"]
diff --git a/pandas/tests/io/parser/test_converters.py b/pandas/tests/io/parser/test_converters.py
index 21933d83ce3f4..85f3db0398080 100644
--- a/pandas/tests/io/parser/test_converters.py
+++ b/pandas/tests/io/parser/test_converters.py
@@ -152,16 +152,28 @@ def convert_score(x):
     tm.assert_frame_equal(results[0], results[1])
 
 
-def test_converter_index_col_bug(all_parsers):
-    # see gh-1835
+@pytest.mark.parametrize("conv_f", [lambda x: x, str])
+def test_converter_index_col_bug(all_parsers, conv_f):
+    # see gh-1835 , GH#40589
     parser = all_parsers
     data = "A;B\n1;2\n3;4"
 
     rs = parser.read_csv(
-        StringIO(data), sep=";", index_col="A", converters={"A": lambda x: x}
+        StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
     )
 
-    xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
+    xp = DataFrame({"B": [2, 4]}, index=Index(["1", "3"], name="A", dtype="object"))
+    tm.assert_frame_equal(rs, xp)
+
+
+def test_converter_identity_object(all_parsers):
+    # GH#40589
+    parser = all_parsers
+    data = "A,B\n1,2\n3,4"
+
+    rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})
+
+    xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
     tm.assert_frame_equal(rs, xp)
 
 

From 6e5667c5b059341e6843922b6af9a687a3f22813 Mon Sep 17 00:00:00 2001
From: phofl <patrick_hoefler@gmx.net>
Date: Tue, 22 Feb 2022 23:51:02 +0100
Subject: [PATCH 2/2] Restructure

---
 pandas/io/parsers/base_parser.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py
index c76e40677ad78..e071e281d5a90 100644
--- a/pandas/io/parsers/base_parser.py
+++ b/pandas/io/parsers/base_parser.py
@@ -502,14 +502,17 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
             clean_dtypes = self._clean_mapping(self.dtype)
 
             cast_type = None
-            if isinstance(clean_dtypes, dict) and self.index_names is not None:
-                cast_type = clean_dtypes.get(self.index_names[i], None)
+            index_converter = False
+            if self.index_names is not None:
+                if isinstance(clean_dtypes, dict):
+                    cast_type = clean_dtypes.get(self.index_names[i], None)
 
-            conv = False
-            if isinstance(converters, dict) and self.index_names is not None:
-                conv = converters.get(self.index_names[i]) is not None
+                if isinstance(converters, dict):
+                    index_converter = converters.get(self.index_names[i]) is not None
 
-            try_num_bool = not (cast_type and is_string_dtype(cast_type) or conv)
+            try_num_bool = not (
+                cast_type and is_string_dtype(cast_type) or index_converter
+            )
 
             arr, _ = self._infer_types(
                 arr, col_na_values | col_na_fvalues, try_num_bool