From 17349f497e8b48965d0cd374f5e64a533b693de3 Mon Sep 17 00:00:00 2001 From: hongyy Date: Sun, 16 Apr 2023 16:00:45 -0400 Subject: [PATCH 1/4] fix_for_pdfloat64_decimal --- pandas/_libs/parsers.pyx | 6 ++++++ pandas/tests/libs/test_lib.py | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index ae1d20ca4e225..a45bfe609d5bc 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1201,6 +1201,12 @@ cdef class TextReader: result, dtype=dtype, true_values=true_values, false_values=false_values) else: + if chr(self.parser.decimal) != '.': + for i in range(len(result)): + # print(result[i], type(result[i])) + #str(chr(self.parser.decimal)) + result[i] = result[i].replace(chr(self.parser.decimal), ".") + result = array_type._from_sequence_of_strings(result, dtype=dtype) except NotImplementedError: raise NotImplementedError( diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 383e1b81e17a7..4e17edbe556b1 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -9,6 +9,8 @@ from pandas import Index import pandas._testing as tm +import pandas as pd +import io class TestMisc: @@ -270,3 +272,13 @@ def test_no_default_pickle(): # GH#40397 obj = tm.round_trip_pickle(lib.no_default) assert obj is lib.no_default + +def test_pdfloat64_decimal_separator(): + df1 = pd.read_csv(io.StringIO('id\n"1,5"\n"1,6"\n'), dtype={'id':pd.Float64Dtype()}, sep=';', decimal=',') + df2 = pd.read_csv(io.StringIO('id\n"1.5"\n"1.6"\n'), dtype={'id':pd.Float64Dtype()}, sep=',', decimal='.') + assert df1.equals(df2) + + df1 = pd.read_csv(io.StringIO('id\n"1,5"\n'), dtype={'id':pd.Float64Dtype()}, sep=';', decimal=',') + df2 = pd.read_csv(io.StringIO('id\n"1.5"\n'), dtype={'id':pd.Float64Dtype()}, sep=',', decimal='.') + assert df1.equals(df2) + From e06290a423c675d12f219dea1148d6e4691f322b Mon Sep 17 00:00:00 2001 From: hongyy Date: Sun, 16 Apr 2023 17:58:11 -0400 Subject: [PATCH 2/4] fix_for_pdfloat64_decimal1 --- pandas/_libs/parsers.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index a45bfe609d5bc..00d08c5727a8b 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1203,8 +1203,6 @@ cdef class TextReader: else: if chr(self.parser.decimal) != '.': for i in range(len(result)): - # print(result[i], type(result[i])) - #str(chr(self.parser.decimal)) result[i] = result[i].replace(chr(self.parser.decimal), ".") result = array_type._from_sequence_of_strings(result, dtype=dtype) From 681440d3e888b1edd3b6e4c1dd2d82ee71d553e1 Mon Sep 17 00:00:00 2001 From: hongyy Date: Mon, 17 Apr 2023 00:44:14 -0400 Subject: [PATCH 3/4] fix_for_pdfloat64_decimal2 --- doc/source/whatsnew/v2.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index c3355757350b9..61856e0a7538e 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -308,6 +308,7 @@ Conversion ^^^^^^^^^^ - Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) +- Bug in :meth:`TextReader._convert_with_dtype` raising ``ValueError`` when ``,`` is used as decimal pointer (:issue:`52086`) - Strings From 3af131bb1e38658db5327bea329d40f7ed68dd45 Mon Sep 17 00:00:00 2001 From: hongyy Date: Mon, 17 Apr 2023 08:26:23 -0400 Subject: [PATCH 4/4] fix_for_pdfloat64_decimal3 --- pandas/_libs/parsers.pyx | 3 +-- pandas/tests/libs/test_lib.py | 35 ++++++++++++++++++++++++++++------- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 00d08c5727a8b..ed572513910c6 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -1201,10 +1201,9 @@ cdef class TextReader: result, dtype=dtype, true_values=true_values, false_values=false_values) else: - if chr(self.parser.decimal) != '.': + if chr(self.parser.decimal) != ".": for i in range(len(result)): result[i] = result[i].replace(chr(self.parser.decimal), ".") - result = array_type._from_sequence_of_strings(result, dtype=dtype) except NotImplementedError: raise NotImplementedError( diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py index 4e17edbe556b1..92a0778880995 100644 --- a/pandas/tests/libs/test_lib.py +++ b/pandas/tests/libs/test_lib.py @@ -1,3 +1,5 @@ +import io + import numpy as np import pytest @@ -7,10 +9,9 @@ writers as libwriters, ) +import pandas as pd from pandas import Index import pandas._testing as tm -import pandas as pd -import io class TestMisc: @@ -273,12 +274,32 @@ def test_no_default_pickle(): obj = tm.round_trip_pickle(lib.no_default) assert obj is lib.no_default + def test_pdfloat64_decimal_separator(): - df1 = pd.read_csv(io.StringIO('id\n"1,5"\n"1,6"\n'), dtype={'id':pd.Float64Dtype()}, sep=';', decimal=',') - df2 = pd.read_csv(io.StringIO('id\n"1.5"\n"1.6"\n'), dtype={'id':pd.Float64Dtype()}, sep=',', decimal='.') + df1 = pd.read_csv( + io.StringIO('id\n"1,5"\n"1,6"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=";", + decimal=",", + ) + df2 = pd.read_csv( + io.StringIO('id\n"1.5"\n"1.6"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=",", + decimal=".", + ) assert df1.equals(df2) - df1 = pd.read_csv(io.StringIO('id\n"1,5"\n'), dtype={'id':pd.Float64Dtype()}, sep=';', decimal=',') - df2 = pd.read_csv(io.StringIO('id\n"1.5"\n'), dtype={'id':pd.Float64Dtype()}, sep=',', decimal='.') + df1 = pd.read_csv( + io.StringIO('id\n"1,5"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=";", + decimal=",", + ) + df2 = pd.read_csv( + io.StringIO('id\n"1.5"\n'), + dtype={"id": pd.Float64Dtype()}, + sep=",", + decimal=".", + ) assert df1.equals(df2) -