From 7418a6706b39993769392780bcce728f29035816 Mon Sep 17 00:00:00 2001 From: vitorcf10 Date: Thu, 30 Nov 2023 02:47:25 -0300 Subject: [PATCH 1/3] FIX: Solving Int64 precision loss when read_csv(StringIO(data), dtype={a:Int64}, engine=pyarrow) --- pandas/io/parsers/arrow_parser_wrapper.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 5786073c9d9cc..15eebd07ac258 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -273,19 +273,6 @@ def read(self) -> DataFrame: dtype_backend = self.kwds["dtype_backend"] - # Convert all pa.null() cols -> float64 (non nullable) - # else Int64 (nullable case, see below) - if dtype_backend is lib.no_default: - new_schema = table.schema - new_type = pa.float64() - for i, arrow_type in enumerate(table.schema.types): - if pa.types.is_null(arrow_type): - new_schema = new_schema.set( - i, new_schema.field(i).with_type(new_type) - ) - - table = table.cast(new_schema) - if dtype_backend == "pyarrow": frame = table.to_pandas(types_mapper=pd.ArrowDtype) elif dtype_backend == "numpy_nullable": @@ -298,7 +285,7 @@ def read(self) -> DataFrame: frame = table.to_pandas(types_mapper=arrow_string_types_mapper()) else: if isinstance(self.kwds.get("dtype"), dict): - frame = table.to_pandas(types_mapper=self.kwds["dtype"].get) + frame = table.to_pandas(types_mapper=self.kwds["dtype"].get, integer_object_nulls=True) else: frame = table.to_pandas() return self._finalize_pandas_output(frame) From 750e8af565169da558f4bdc178dbbf57c36cda93 Mon Sep 17 00:00:00 2001 From: vitorcf10 Date: Thu, 30 Nov 2023 04:53:26 -0300 Subject: [PATCH 2/3] Changing for test approval. --- pandas/io/parsers/arrow_parser_wrapper.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 15eebd07ac258..4da18b01a775d 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -272,6 +272,19 @@ def read(self) -> DataFrame: raise ParserError(e) from e dtype_backend = self.kwds["dtype_backend"] + + # Convert all pa.null() cols -> float64 (non nullable) + # else Int64 (nullable case, see below) + if dtype_backend is lib.no_default: + new_schema = table.schema + new_type = pa.float64() + for i, arrow_type in enumerate(table.schema.types): + if pa.types.is_null(arrow_type): + new_schema = new_schema.set( + i, new_schema.field(i).with_type(new_type) + ) + + table = table.cast(new_schema) if dtype_backend == "pyarrow": frame = table.to_pandas(types_mapper=pd.ArrowDtype) From 23d37726b09a610a088a1e93334c5e3a3ddec0e1 Mon Sep 17 00:00:00 2001 From: vitorcf10 Date: Thu, 30 Nov 2023 06:15:29 -0300 Subject: [PATCH 3/3] testcommit --- pandas/io/parsers/arrow_parser_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 4da18b01a775d..4eec761ab76de 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -298,7 +298,7 @@ def read(self) -> DataFrame: frame = table.to_pandas(types_mapper=arrow_string_types_mapper()) else: if isinstance(self.kwds.get("dtype"), dict): - frame = table.to_pandas(types_mapper=self.kwds["dtype"].get, integer_object_nulls=True) + frame = table.to_pandas(types_mapper=self.kwds["dtype"].get) else: frame = table.to_pandas() return self._finalize_pandas_output(frame)