Skip to content

Commit 205e637

Browse files
TST (string dtype): fix and clean up arrow roundtrip tests (#59678)
* TST (string dtype): fix and clean up arrow roundtrip tests * fix using_infer_string
1 parent 4de4268 commit 205e637

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

pandas/tests/arrays/masked/test_arrow_compat.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
import pandas._testing as tm
86

9-
pytestmark = [
10-
pytest.mark.filterwarnings(
11-
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
12-
),
13-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
14-
]
7+
pytestmark = pytest.mark.filterwarnings(
8+
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
9+
)
1510

1611

1712
pa = pytest.importorskip("pyarrow")

pandas/tests/arrays/string_/test_string.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -524,7 +524,6 @@ def test_arrow_array(dtype):
524524
assert arr.equals(expected)
525525

526526

527-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
528527
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
529528
def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
530529
# roundtrip possible from arrow 1.0.0
@@ -543,13 +542,16 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
543542
assert result["a"].dtype == "object"
544543
else:
545544
assert isinstance(result["a"].dtype, pd.StringDtype)
546-
expected = df.astype(f"string[{string_storage}]")
545+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
546+
if using_infer_string:
547+
expected.columns = expected.columns.astype(
548+
pd.StringDtype(string_storage, na_value=np.nan)
549+
)
547550
tm.assert_frame_equal(result, expected)
548551
# ensure the missing value is represented by NA and not np.nan or None
549552
assert result.loc[2, "a"] is result["a"].dtype.na_value
550553

551554

552-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
553555
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
554556
def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
555557
# GH-41040
@@ -571,7 +573,11 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
571573
assert result["a"].dtype == "object"
572574
else:
573575
assert isinstance(result["a"].dtype, pd.StringDtype)
574-
expected = df.astype(f"string[{string_storage}]")
576+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
577+
if using_infer_string:
578+
expected.columns = expected.columns.astype(
579+
pd.StringDtype(string_storage, na_value=np.nan)
580+
)
575581
tm.assert_frame_equal(result, expected)
576582

577583

0 commit comments

Comments
 (0)