Skip to content

Commit 766e2fc

Browse files
authored
Incorrect reading of CSV containing large integers Issue#52505 (#54679)
* mend * mmend * mmend * mmend
1 parent 8fe161c commit 766e2fc

File tree

3 files changed

+22
-2
lines changed

3 files changed

+22
-2
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ Timezones
185185

186186
Numeric
187187
^^^^^^^
188-
-
188+
- Bug in :func:`read_csv` with ``engine="pyarrow"`` causing rounding errors for large integers (:issue:`52505`)
189189
-
190190

191191
Conversion

pandas/io/parsers/arrow_parser_wrapper.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -223,5 +223,8 @@ def read(self) -> DataFrame:
223223
elif using_pyarrow_string_dtype():
224224
frame = table.to_pandas(types_mapper=arrow_string_types_mapper())
225225
else:
226-
frame = table.to_pandas()
226+
if isinstance(self.kwds.get("dtype"), dict):
227+
frame = table.to_pandas(types_mapper=self.kwds["dtype"].get)
228+
else:
229+
frame = table.to_pandas()
227230
return self._finalize_pandas_output(frame)

pandas/tests/io/parser/dtypes/test_dtypes_basic.py

+17
Original file line numberDiff line numberDiff line change
@@ -558,3 +558,20 @@ def test_string_inference(all_parsers):
558558
columns=pd.Index(["a", "b"], dtype=dtype),
559559
)
560560
tm.assert_frame_equal(result, expected)
561+
562+
563+
def test_accurate_parsing_of_large_integers(all_parsers):
564+
# GH#52505
565+
data = """SYMBOL,MOMENT,ID,ID_DEAL
566+
AAPL,20230301181139587,1925036343869802844,
567+
AAPL,20230301181139587,2023552585717889863,2023552585717263358
568+
NVDA,20230301181139587,2023552585717889863,2023552585717263359
569+
AMC,20230301181139587,2023552585717889863,2023552585717263360
570+
AMZN,20230301181139587,2023552585717889759,2023552585717263360
571+
MSFT,20230301181139587,2023552585717889863,2023552585717263361
572+
NVDA,20230301181139587,2023552585717889827,2023552585717263361"""
573+
orders = pd.read_csv(StringIO(data), dtype={"ID_DEAL": pd.Int64Dtype()})
574+
assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263358, "ID_DEAL"]) == 1
575+
assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263359, "ID_DEAL"]) == 1
576+
assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263360, "ID_DEAL"]) == 2
577+
assert len(orders.loc[orders["ID_DEAL"] == 2023552585717263361, "ID_DEAL"]) == 2

0 commit comments

Comments
 (0)