Skip to content

Commit 7ee8ab0

Browse files
authored
BUG: read_csv not respecting converter in all cases for index col (#46053)
1 parent c44871c commit 7ee8ab0

File tree

4 files changed

+29
-8
lines changed

4 files changed

+29
-8
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ I/O
360360
- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`)
361361
- Bug in :func:`read_csv` not recognizing line break for ``on_bad_lines="warn"`` for ``engine="c"`` (:issue:`41710`)
362362
- Bug in :meth:`DataFrame.to_csv` not respecting ``float_format`` for ``Float64`` dtype (:issue:`45991`)
363+
- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
363364
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
364365
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
365366

pandas/io/parsers/base_parser.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ def __init__(self, kwds):
103103
self.keep_default_na = kwds.get("keep_default_na", True)
104104

105105
self.dtype = copy(kwds.get("dtype", None))
106+
self.converters = kwds.get("converters")
106107

107108
self.true_values = kwds.get("true_values")
108109
self.false_values = kwds.get("false_values")
@@ -476,6 +477,7 @@ def _clean_mapping(self, mapping):
476477
@final
477478
def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
478479
arrays = []
480+
converters = self._clean_mapping(self.converters)
479481

480482
for i, arr in enumerate(index):
481483

@@ -500,10 +502,17 @@ def _agg_index(self, index, try_parse_dates: bool = True) -> Index:
500502
clean_dtypes = self._clean_mapping(self.dtype)
501503

502504
cast_type = None
503-
if isinstance(clean_dtypes, dict) and self.index_names is not None:
504-
cast_type = clean_dtypes.get(self.index_names[i], None)
505+
index_converter = False
506+
if self.index_names is not None:
507+
if isinstance(clean_dtypes, dict):
508+
cast_type = clean_dtypes.get(self.index_names[i], None)
509+
510+
if isinstance(converters, dict):
511+
index_converter = converters.get(self.index_names[i]) is not None
505512

506-
try_num_bool = not (cast_type and is_string_dtype(cast_type))
513+
try_num_bool = not (
514+
cast_type and is_string_dtype(cast_type) or index_converter
515+
)
507516

508517
arr, _ = self._infer_types(
509518
arr, col_na_values | col_na_fvalues, try_num_bool

pandas/io/parsers/python_parser.py

-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,6 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds):
9595
self.has_index_names = kwds["has_index_names"]
9696

9797
self.verbose = kwds["verbose"]
98-
self.converters = kwds["converters"]
9998

10099
self.thousands = kwds["thousands"]
101100
self.decimal = kwds["decimal"]

pandas/tests/io/parser/test_converters.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -152,16 +152,28 @@ def convert_score(x):
152152
tm.assert_frame_equal(results[0], results[1])
153153

154154

155-
def test_converter_index_col_bug(all_parsers):
156-
# see gh-1835
155+
@pytest.mark.parametrize("conv_f", [lambda x: x, str])
156+
def test_converter_index_col_bug(all_parsers, conv_f):
157+
# see gh-1835 , GH#40589
157158
parser = all_parsers
158159
data = "A;B\n1;2\n3;4"
159160

160161
rs = parser.read_csv(
161-
StringIO(data), sep=";", index_col="A", converters={"A": lambda x: x}
162+
StringIO(data), sep=";", index_col="A", converters={"A": conv_f}
162163
)
163164

164-
xp = DataFrame({"B": [2, 4]}, index=Index([1, 3], name="A"))
165+
xp = DataFrame({"B": [2, 4]}, index=Index(["1", "3"], name="A", dtype="object"))
166+
tm.assert_frame_equal(rs, xp)
167+
168+
169+
def test_converter_identity_object(all_parsers):
170+
# GH#40589
171+
parser = all_parsers
172+
data = "A,B\n1,2\n3,4"
173+
174+
rs = parser.read_csv(StringIO(data), converters={"A": lambda x: x})
175+
176+
xp = DataFrame({"A": ["1", "3"], "B": [2, 4]})
165177
tm.assert_frame_equal(rs, xp)
166178

167179

0 commit comments

Comments
 (0)