BUG: read_csv not converting to float for python engine with decimal sep, usecols and parse_dates

phofl · phofl · commit 8c2e1caaf4a0 · 2020-12-06T21:49:18.000+01:00
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -724,6 +724,7 @@ I/O
 - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
 - Bug in :func:`read_html` was raising a ``TypeError`` when supplying a ``pathlib.Path`` argument to the ``io`` parameter (:issue:`37705`)
 - :meth:`DataFrame.to_excel`, :meth:`Series.to_excel`, :meth:`DataFrame.to_markdown`, and :meth:`Series.to_markdown` now support writing to fsspec URLs such as S3 and Google Cloud Storage (:issue:`33987`)
+- Bug in :meth:`read_csv` returning object dtype when ``delimiter=","`` with ``usecols`` and ``parse_dates`` specified for ``engine="python"`` (:issue:`35873`)
 - Bug in :func:`read_fwf` with ``skip_blank_lines=True`` was not skipping blank lines (:issue:`37758`)
 - Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`)
 - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other :meth:``read_*`` functions (:issue:`37909`)
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
@@ -2354,12 +2354,16 @@ def _set_no_thousands_columns(self):
         # Create a set of column ids that are not to be stripped of thousands
         # operators.
         noconvert_columns = set()
+        if self._col_indices is not None:
+            col_indices = sorted(self._col_indices)
+        else:
+            col_indices = list(range(len(self.columns)))
 
         def _set(x):
             if is_integer(x):
                 noconvert_columns.add(x)
             else:
-                noconvert_columns.add(self.columns.index(x))
+                noconvert_columns.add(col_indices[self.columns.index(x)])
 
         if isinstance(self.parse_dates, list):
             for val in self.parse_dates:
diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py
@@ -12,7 +12,7 @@
 
 from pandas.errors import ParserError
 
-from pandas import DataFrame, Index, MultiIndex
+from pandas import DataFrame, Index, MultiIndex, Timestamp
 import pandas._testing as tm
 
 
@@ -314,3 +314,19 @@ def test_malformed_skipfooter(python_parser_only):
     msg = "Expected 3 fields in line 4, saw 5"
     with pytest.raises(ParserError, match=msg):
         parser.read_csv(StringIO(data), header=1, comment="#", skipfooter=1)
+
+
+def test_delimiter_with_usecols_and_parse_dates(python_parser_only):
+    # GH#35873
+    result = python_parser_only.read_csv(
+        StringIO('"dump","-9,1","-9,1",20101010'),
+        engine="python",
+        names=["col", "col1", "col2", "col3"],
+        usecols=["col1", "col2", "col3"],
+        parse_dates=["col3"],
+        decimal=",",
+    )
+    expected = DataFrame(
+        {"col1": [-9.1], "col2": [-9.1], "col3": [Timestamp("2010-10-10")]}
+    )
+    tm.assert_frame_equal(result, expected)