diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3355757350b9..61856e0a7538e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -308,6 +308,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) +- Bug in :meth:`TextReader._convert_with_dtype` raising ``ValueError`` when ``,`` is used as decimal pointer (:issue:`52086`) - Strings diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ae1d20ca4e225..ed572513910c6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1201,6 +1201,9 @@ cdef class TextReader: result, dtype=dtype, true_values=true_values, false_values=false_values) else: + if chr(self.parser.decimal) != ".": + for i in range(len(result)): + result[i] = result[i].replace(chr(self.parser.decimal), ".") result = array_type._from_sequence_of_strings(result, dtype=dtype) except NotImplementedError: raise NotImplementedError( diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 383e1b81e17a7..92a0778880995 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -1,3 +1,5 @@ +import io + import numpy as np import pytest @@ -7,6 +9,7 @@ writers as libwriters, ) +import pandas as pd from pandas import Index import pandas._testing as tm @@ -270,3 +273,33 @@ def test_no_default_pickle(): # GH#40397 obj = tm.round_trip_pickle(lib.no_default) assert obj is lib.no_default + + +def test_pdfloat64_decimal_separator(): + df1 = pd.read_csv( + io.StringIO('id\n"1,5"\n"1,6"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=";", + decimal=",", + ) + df2 = pd.read_csv( + io.StringIO('id\n"1.5"\n"1.6"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=",", + decimal=".", + ) + assert df1.equals(df2) + + df1 = pd.read_csv( + io.StringIO('id\n"1,5"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=";", + decimal=",", + ) + df2 = pd.read_csv( + io.StringIO('id\n"1.5"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=",", + decimal=".", + ) + assert df1.equals(df2)