Skip to content

Commit 2c76740

Browse files
TST (string dtype): fix and clean up arrow roundtrip tests (#59678)
* TST (string dtype): fix and clean up arrow roundtrip tests * fix using_infer_string
1 parent e07453e commit 2c76740

File tree

2 files changed

+13
-12
lines changed

2 files changed

+13
-12
lines changed

pandas/tests/arrays/masked/test_arrow_compat.py

+3-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,12 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._config import using_string_dtype
5-
64
import pandas as pd
75
import pandas._testing as tm
86

9-
pytestmark = [
10-
pytest.mark.filterwarnings(
11-
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
12-
),
13-
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
14-
]
7+
pytestmark = pytest.mark.filterwarnings(
8+
"ignore:Passing a BlockManager to DataFrame:DeprecationWarning"
9+
)
1510

1611

1712
pa = pytest.importorskip("pyarrow")

pandas/tests/arrays/string_/test_string.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -522,7 +522,6 @@ def test_arrow_array(dtype):
522522
assert arr.equals(expected)
523523

524524

525-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
526525
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
527526
def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
528527
# roundtrip possible from arrow 1.0.0
@@ -541,13 +540,16 @@ def test_arrow_roundtrip(dtype, string_storage, using_infer_string):
541540
assert result["a"].dtype == "object"
542541
else:
543542
assert isinstance(result["a"].dtype, pd.StringDtype)
544-
expected = df.astype(f"string[{string_storage}]")
543+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
544+
if using_infer_string:
545+
expected.columns = expected.columns.astype(
546+
pd.StringDtype(string_storage, na_value=np.nan)
547+
)
545548
tm.assert_frame_equal(result, expected)
546549
# ensure the missing value is represented by NA and not np.nan or None
547550
assert result.loc[2, "a"] is result["a"].dtype.na_value
548551

549552

550-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
551553
@pytest.mark.filterwarnings("ignore:Passing a BlockManager:DeprecationWarning")
552554
def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
553555
# GH-41040
@@ -569,7 +571,11 @@ def test_arrow_load_from_zero_chunks(dtype, string_storage, using_infer_string):
569571
assert result["a"].dtype == "object"
570572
else:
571573
assert isinstance(result["a"].dtype, pd.StringDtype)
572-
expected = df.astype(f"string[{string_storage}]")
574+
expected = df.astype(pd.StringDtype(string_storage, na_value=dtype.na_value))
575+
if using_infer_string:
576+
expected.columns = expected.columns.astype(
577+
pd.StringDtype(string_storage, na_value=np.nan)
578+
)
573579
tm.assert_frame_equal(result, expected)
574580

575581

0 commit comments

Comments
 (0)