Skip to content

Commit cf32a23

Browse files
authored
Backport PR #54881 on branch 2.1.x (REGR: read_csv raising when dtypes is specified with usecols) (#54926)
1 parent 9c8800d commit cf32a23

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v2.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed regression in :func:`read_csv` when ``usecols`` is given and ``dtypes`` is a dict for ``engine="python"`` (:issue:`54868`)
1617
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
1718
- Fixed regression when comparing a :class:`Series` with ``datetime64`` dtype with ``None`` (:issue:`54870`)
1819

pandas/io/parsers/python_parser.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1176,17 +1176,17 @@ def _set_no_thousand_columns(self) -> set[int]:
11761176
)
11771177
if self.columns and self.dtype:
11781178
assert self._col_indices is not None
1179-
for i in self._col_indices:
1179+
for i, col in zip(self._col_indices, self.columns):
11801180
if not isinstance(self.dtype, dict) and not is_numeric_dtype(
11811181
self.dtype
11821182
):
11831183
no_thousands_columns.add(i)
11841184
if (
11851185
isinstance(self.dtype, dict)
1186-
and self.columns[i] in self.dtype
1186+
and col in self.dtype
11871187
and (
1188-
not is_numeric_dtype(self.dtype[self.columns[i]])
1189-
or is_bool_dtype(self.dtype[self.columns[i]])
1188+
not is_numeric_dtype(self.dtype[col])
1189+
or is_bool_dtype(self.dtype[col])
11901190
)
11911191
):
11921192
no_thousands_columns.add(i)

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+17
Original file line numberDiff line numberDiff line change
@@ -558,3 +558,20 @@ def test_string_inference(all_parsers):
558558
columns=pd.Index(["a", "b"], dtype=dtype),
559559
)
560560
tm.assert_frame_equal(result, expected)
561+
562+
563+
def test_dtypes_with_usecols(all_parsers):
564+
# GH#54868
565+
566+
parser = all_parsers
567+
data = """a,b,c
568+
1,2,3
569+
4,5,6"""
570+
571+
result = parser.read_csv(StringIO(data), usecols=["a", "c"], dtype={"a": object})
572+
if parser.engine == "pyarrow":
573+
values = [1, 4]
574+
else:
575+
values = ["1", "4"]
576+
expected = DataFrame({"a": pd.Series(values, dtype=object), "c": [3, 6]})
577+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)