diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 6fdffb4d78341..138be2457d718 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -139,7 +139,7 @@ Timezones Numeric ^^^^^^^ -- +- Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`) - Conversion diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 71bfb00a95b50..bb6bcd3c4d6a0 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -223,5 +223,8 @@ def read(self) -> DataFrame: elif using_pyarrow_string_dtype(): frame = table.to_pandas(types_mapper=arrow_string_types_mapper()) else: - frame = table.to_pandas() + if isinstance(self.kwds.get("dtype"), dict): + frame = table.to_pandas(types_mapper=self.kwds["dtype"].get) + else: + frame = table.to_pandas() return self._finalize_pandas_output(frame) diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py index 1c0f0939029ff..f797f6392d56c 100644 --- a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py +++ b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py @@ -558,3 +558,20 @@ def test_string_inference(all_parsers): columns=pd.Index(["a", "b"], dtype=dtype), ) tm.assert_frame_equal(result, expected) + + +def test_accurate_parsing_of_large_integers(all_parsers): + # GH#52505 + data = """SYMBOL,MOMENT,ID,ID_DEAL +AAPL,20230301181139587,1925036343869802844, +AAPL,20230301181139587,2023552585717889863,2023552585717263358 +NVDA,20230301181139587,2023552585717889863,2023552585717263359 +AMC,20230301181139587,2023552585717889863,2023552585717263360 +AMZN,20230301181139587,2023552585717889759,2023552585717263360 +MSFT,20230301181139587,2023552585717889863,2023552585717263361 +NVDA,20230301181139587,2023552585717889827,2023552585717263361""" + orders = pd.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()}) + assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263358, "ID_DEAL"]) == 1 + assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1 + assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2 + assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263361, "ID_DEAL"]) == 2