pandas-dev · mroeschke · Aug 24, 2023 · Aug 21, 2023 · Aug 23, 2023 · Aug 23, 2023
diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst
@@ -139,7 +139,7 @@ Timezones
 
 Numeric
 ^^^^^^^
--
+- Bug in :func:`_read`, pyarrow engine defaulting to float64 causing rounding errors for large integers; now processes input appropriately (:issue:`52505`)
- Bug in :func:`_read`, pyarrow engine defaulting to float64 causing rounding errors for large integers; now processes input appropriately (:issue:`52505`)
+- Bug in :func:`read_csv` with `engine="pyarrow"` causing rounding errors for large integers (:issue:`52505`)
- Bug in :func:`_read`, pyarrow engine defaulting to float64 causing rounding errors for large integers; now processes input appropriately (:issue:`52505`)
+- Bug in :func:`read_csv` with `engine="pyarrow"` causing rounding errors for large integers (:issue:`52505`)
 -
 
 Conversion

diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py
@@ -223,5 +223,10 @@ def read(self) -> DataFrame:
         elif using_pyarrow_string_dtype():
             frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
         else:
-            frame = table.to_pandas()
+            if self.kwds.get("dtype") is not None and isinstance(
+                type(self.kwds.get("dtype")), dict
-            if self.kwds.get("dtype") is not None and isinstance(
-                type(self.kwds.get("dtype")), dict
+            if isinstance(self.kwds.get("dtype"), dict):
-            if self.kwds.get("dtype") is not None and isinstance(
-                type(self.kwds.get("dtype")), dict
+            if isinstance(self.kwds.get("dtype"), dict):
+            ):
+                frame = table.to_pandas(types_mapper=self.kwds["dtype"].get)
+            else:
+                frame = table.to_pandas()
         return self._finalize_pandas_output(frame)
diff --git a/pandas/tests/io/parser/dtypes/test_dtypes_basic.py b/pandas/tests/io/parser/dtypes/test_dtypes_basic.py
@@ -558,3 +558,20 @@ def test_string_inference(all_parsers):
         columns=pd.Index(["a", "b"], dtype=dtype),
     )
     tm.assert_frame_equal(result, expected)
+
+
+def test_accurate_parsing_of_large_integers(all_parsers):
+    # GH#52505
+    data = """SYMBOL,MOMENT,ID,ID_DEAL
+AAPL,20230301181139587,1925036343869802844,
+AAPL,20230301181139587,2023552585717889863,2023552585717263358
+NVDA,20230301181139587,2023552585717889863,2023552585717263359
+AMC,20230301181139587,2023552585717889863,2023552585717263360
+AMZN,20230301181139587,2023552585717889759,2023552585717263360
+MSFT,20230301181139587,2023552585717889863,2023552585717263361
+NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
+    orders = pd.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()})
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263358, "ID_DEAL"]) == 1
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2
+    assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263361, "ID_DEAL"]) == 2