Skip to content

Commit d42fbed

Browse files
Backport PR pandas-dev#54768 on branch 2.1.x (Fix roundtripping with pyarrow schema) (pandas-dev#54773)
Backport PR pandas-dev#54768: Fix roundtripping with pyarrow schema Co-authored-by: Patrick Hoefler <[email protected]>
1 parent 605aea2 commit d42fbed

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

pandas/core/arrays/string_.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
missing as libmissing,
1515
)
1616
from pandas._libs.arrays import NDArrayBacked
17+
from pandas._libs.lib import ensure_string_array
1718
from pandas.compat import pa_version_under7p0
1819
from pandas.compat.numpy import function as nv
1920
from pandas.util._decorators import doc
@@ -221,7 +222,7 @@ def __from_arrow__(
221222
arr = np.array([], dtype=object)
222223
else:
223224
arr = pyarrow.concat_arrays(chunks).to_numpy(zero_copy_only=False)
224-
arr = lib.convert_nans_to_NA(arr)
225+
arr = ensure_string_array(arr, na_value=libmissing.NA)
225226
# Bypass validation inside StringArray constructor, see GH#47781
226227
new_string_array = StringArray.__new__(StringArray)
227228
NDArrayBacked.__init__(

pandas/tests/io/test_parquet.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" test parquet compat """
22
import datetime
3+
from decimal import Decimal
34
from io import BytesIO
45
import os
56
import pathlib
@@ -16,6 +17,7 @@
1617
from pandas.compat.pyarrow import (
1718
pa_version_under7p0,
1819
pa_version_under8p0,
20+
pa_version_under11p0,
1921
pa_version_under13p0,
2022
)
2123

@@ -1111,6 +1113,18 @@ def test_string_inference(self, tmp_path, pa):
11111113
)
11121114
tm.assert_frame_equal(result, expected)
11131115

1116+
@pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0")
1117+
def test_roundtrip_decimal(self, tmp_path, pa):
1118+
# GH#54768
1119+
import pyarrow as pa
1120+
1121+
path = tmp_path / "decimal.p"
1122+
df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]")
1123+
df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
1124+
result = read_parquet(path)
1125+
expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
1126+
tm.assert_frame_equal(result, expected)
1127+
11141128

11151129
class TestParquetFastParquet(Base):
11161130
def test_basic(self, fp, df_full):

0 commit comments

Comments
 (0)