Skip to content

Commit 267512f

Browse files
phoflmroeschke
authored andcommitted
Fix roundtripping with pyarrow schema (pandas-dev#54768)
* Fix roundtripping with pyarrow schema * Skip for lower versions
1 parent 8e3963b commit 267512f

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

pandas/core/arrays/string_.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
missing as libmissing,
1616
)
1717
from pandas._libs.arrays import NDArrayBacked
18+
from pandas._libs.lib import ensure_string_array
1819
from pandas.compat import pa_version_under7p0
1920
from pandas.compat.numpy import function as nv
2021
from pandas.util._decorators import doc
@@ -224,7 +225,7 @@ def __from_arrow__(
224225
arr = np.array([], dtype=object)
225226
else:
226227
arr = pyarrow.concat_arrays(chunks).to_numpy(zero_copy_only=False)
227-
arr = lib.convert_nans_to_NA(arr)
228+
arr = ensure_string_array(arr, na_value=libmissing.NA)
228229
# Bypass validation inside StringArray constructor, see GH#47781
229230
new_string_array = StringArray.__new__(StringArray)
230231
NDArrayBacked.__init__(

pandas/tests/io/test_parquet.py

+14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
""" test parquet compat """
22
import datetime
3+
from decimal import Decimal
34
from io import BytesIO
45
import os
56
import pathlib
@@ -16,6 +17,7 @@
1617
from pandas.compat.pyarrow import (
1718
pa_version_under7p0,
1819
pa_version_under8p0,
20+
pa_version_under11p0,
1921
pa_version_under13p0,
2022
)
2123

@@ -1125,6 +1127,18 @@ def test_string_inference(self, tmp_path, pa):
11251127
)
11261128
tm.assert_frame_equal(result, expected)
11271129

1130+
@pytest.mark.skipif(pa_version_under11p0, reason="not supported before 11.0")
1131+
def test_roundtrip_decimal(self, tmp_path, pa):
1132+
# GH#54768
1133+
import pyarrow as pa
1134+
1135+
path = tmp_path / "decimal.p"
1136+
df = pd.DataFrame({"a": [Decimal("123.00")]}, dtype="string[pyarrow]")
1137+
df.to_parquet(path, schema=pa.schema([("a", pa.decimal128(5))]))
1138+
result = read_parquet(path)
1139+
expected = pd.DataFrame({"a": ["123"]}, dtype="string[python]")
1140+
tm.assert_frame_equal(result, expected)
1141+
11281142

11291143
class TestParquetFastParquet(Base):
11301144
def test_basic(self, fp, df_full):

0 commit comments

Comments
 (0)