Skip to content

Commit 27c326a

Browse files
TST (string dtype): add test build with future strings enabled without pyarrow (#59437)
* TST (string dtype): add test build with future strings enabled without pyarrow * ensure the build doesn't override the default ones * uninstall -> remove * avoid jobs with same env being cancelled * use different python version for both future jobs * add some xfails * fixup xfails * less strict
1 parent 603f105 commit 27c326a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+324
-33
lines changed

.github/actions/setup-conda/action.yml

+6
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,9 @@ runs:
1414
condarc-file: ci/.condarc
1515
cache-environment: true
1616
cache-downloads: true
17+
18+
- name: Uninstall pyarrow
19+
if: ${{ env.REMOVE_PYARROW == '1' }}
20+
run: |
21+
micromamba remove -y pyarrow
22+
shell: bash -el {0}

.github/workflows/unit-tests.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
3030
# Prevent the include jobs from overriding other jobs
3131
pattern: [""]
32+
pandas_future_infer_string: ["0"]
3233
include:
3334
- name: "Downstream Compat"
3435
env_file: actions-311-downstream_compat.yaml
@@ -58,6 +59,9 @@ jobs:
5859
# It will be temporarily activated during tests with locale.setlocale
5960
extra_loc: "zh_CN"
6061
- name: "Future infer strings"
62+
env_file: actions-312.yaml
63+
pandas_future_infer_string: "1"
64+
- name: "Future infer strings (without pyarrow)"
6165
env_file: actions-311.yaml
6266
pandas_future_infer_string: "1"
6367
- name: "Pypy"
@@ -85,9 +89,10 @@ jobs:
8589
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
8690
# Clipboard tests
8791
QT_QPA_PLATFORM: offscreen
92+
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
8893
concurrency:
8994
# https://github.community/t/concurrecy-not-work-for-push/183068/7
90-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
95+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
9196
cancel-in-progress: true
9297

9398
services:

pandas/tests/apply/test_frame_apply.py

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from pandas._config import using_string_dtype
88

9+
from pandas.compat import HAS_PYARROW
10+
911
from pandas.core.dtypes.dtypes import CategoricalDtype
1012

1113
import pandas as pd
@@ -1245,6 +1247,9 @@ def test_agg_multiple_mixed():
12451247
tm.assert_frame_equal(result, expected)
12461248

12471249

1250+
@pytest.mark.xfail(
1251+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
1252+
)
12481253
def test_agg_multiple_mixed_raises():
12491254
# GH 20909
12501255
mdf = DataFrame(

pandas/tests/apply/test_invalid_arg.py

+11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
import numpy as np
1313
import pytest
1414

15+
from pandas._config import using_string_dtype
16+
17+
from pandas.compat import HAS_PYARROW
1518
from pandas.errors import SpecificationError
1619

1720
from pandas import (
@@ -209,6 +212,10 @@ def transform(row):
209212
data.apply(transform, axis=1)
210213

211214

215+
# we should raise a proper TypeError instead of propagating the pyarrow error
216+
@pytest.mark.xfail(
217+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
218+
)
212219
@pytest.mark.parametrize(
213220
"df, func, expected",
214221
tm.get_cython_table_params(
@@ -229,6 +236,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
229236
df.agg(func, axis=axis)
230237

231238

239+
# we should raise a proper TypeError instead of propagating the pyarrow error
240+
@pytest.mark.xfail(
241+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
242+
)
232243
@pytest.mark.parametrize(
233244
"series, func, expected",
234245
chain(

pandas/tests/apply/test_numba.py

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):
104104

105105

106106
def test_numba_unsupported_dtypes(apply_axis):
107+
pytest.importorskip("pyarrow")
107108
f = lambda x: x
108109
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
109110
df["c"] = df["c"].astype("double[pyarrow]")

pandas/tests/arithmetic/test_object.py

+6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas._config import using_string_dtype
12+
13+
from pandas.compat import HAS_PYARROW
1114
import pandas.util._test_decorators as td
1215

1316
import pandas as pd
@@ -315,6 +318,9 @@ def test_add(self):
315318
expected = pd.Index(["1a", "1b", "1c"])
316319
tm.assert_index_equal("1" + index, expected)
317320

321+
@pytest.mark.xfail(
322+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
323+
)
318324
def test_sub_fail(self, using_infer_string):
319325
index = pd.Index([str(i) for i in range(10)])
320326

pandas/tests/arrays/boolean/test_arithmetic.py

+7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
8+
from pandas.compat import HAS_PYARROW
9+
610
import pandas as pd
711
import pandas._testing as tm
812

@@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
9094
# -----------------------------------------------------------------------------
9195

9296

97+
@pytest.mark.xfail(
98+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
99+
)
93100
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
94101
# invalid ops
95102

pandas/tests/arrays/categorical/test_analytics.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
from pandas._config import using_string_dtype
88

9-
from pandas.compat import PYPY
9+
from pandas.compat import (
10+
HAS_PYARROW,
11+
PYPY,
12+
)
1013

1114
from pandas import (
1215
Categorical,
@@ -296,7 +299,9 @@ def test_nbytes(self):
296299
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
297300
assert cat.nbytes == exp
298301

299-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
302+
@pytest.mark.xfail(
303+
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
304+
)
300305
def test_memory_usage(self):
301306
cat = Categorical([1, 2, 3])
302307

pandas/tests/arrays/categorical/test_constructors.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from pandas._config import using_string_dtype
1010

11+
from pandas.compat import HAS_PYARROW
12+
1113
from pandas.core.dtypes.common import (
1214
is_float_dtype,
1315
is_integer_dtype,
@@ -442,7 +444,9 @@ def test_constructor_str_unknown(self):
442444
with pytest.raises(ValueError, match="Unknown dtype"):
443445
Categorical([1, 2], dtype="foo")
444446

445-
@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
447+
@pytest.mark.xfail(
448+
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
449+
)
446450
def test_constructor_np_strs(self):
447451
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
448452
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])

pandas/tests/arrays/integer/test_reduction.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.compat import HAS_PYARROW
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected):
102104
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
103105
],
104106
)
105-
def test_mixed_reductions(op, expected, using_infer_string):
106-
if op in ["any", "all"] and using_infer_string:
107-
expected = expected.astype("bool")
107+
def test_mixed_reductions(request, op, expected, using_infer_string):
108+
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
109+
# TODO(infer_string) inconsistent result type
110+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
108111
df = DataFrame(
109112
{
110113
"A": ["a", "b", "b"],

pandas/tests/base/test_conversion.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
6+
from pandas.compat import HAS_PYARROW
7+
48
from pandas.core.dtypes.dtypes import DatetimeTZDtype
59

610
import pandas as pd
@@ -20,6 +24,7 @@
2024
SparseArray,
2125
TimedeltaArray,
2226
)
27+
from pandas.core.arrays.string_ import StringArrayNumpySemantics
2328
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
2429

2530

@@ -218,7 +223,9 @@ def test_iter_box_period(self):
218223
)
219224
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
220225
if using_infer_string and dtype == "object":
221-
expected_type = ArrowStringArrayNumpySemantics
226+
expected_type = (
227+
ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics
228+
)
222229
l_values = Series(arr)._values
223230
r_values = pd.Index(arr)._values
224231
assert type(l_values) is expected_type
@@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
355362
tm.assert_numpy_array_equal(result, expected)
356363

357364

365+
@pytest.mark.xfail(
366+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
367+
)
358368
@pytest.mark.parametrize("as_series", [True, False])
359369
@pytest.mark.parametrize(
360370
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]

pandas/tests/copy_view/test_astype.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas._config import using_string_dtype
77

8+
from pandas.compat import HAS_PYARROW
89
from pandas.compat.pyarrow import pa_version_under12p0
910
import pandas.util._test_decorators as td
1011

@@ -197,7 +198,7 @@ def test_astype_arrow_timestamp():
197198
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)
198199

199200

200-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
201+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
201202
def test_convert_dtypes_infer_objects():
202203
ser = Series(["a", "b", "c"])
203204
ser_orig = ser.copy()
@@ -213,7 +214,7 @@ def test_convert_dtypes_infer_objects():
213214
tm.assert_series_equal(ser, ser_orig)
214215

215216

216-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
217+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
217218
def test_convert_dtypes():
218219
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
219220
df_orig = df.copy()

pandas/tests/copy_view/test_functions.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
7+
68
from pandas import (
79
DataFrame,
810
Index,
@@ -14,7 +16,7 @@
1416
from pandas.tests.copy_view.util import get_array
1517

1618

17-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
19+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
1820
def test_concat_frames():
1921
df = DataFrame({"b": ["a"] * 3})
2022
df2 = DataFrame({"a": ["a"] * 3})
@@ -33,7 +35,7 @@ def test_concat_frames():
3335
tm.assert_frame_equal(df, df_orig)
3436

3537

36-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
38+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
3739
def test_concat_frames_updating_input():
3840
df = DataFrame({"b": ["a"] * 3})
3941
df2 = DataFrame({"a": ["a"] * 3})
@@ -153,7 +155,7 @@ def test_concat_copy_keyword():
153155
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
154156

155157

156-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
158+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
157159
@pytest.mark.parametrize(
158160
"func",
159161
[
@@ -249,7 +251,7 @@ def test_merge_copy_keyword():
249251
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
250252

251253

252-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
254+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
253255
def test_join_on_key():
254256
df_index = Index(["a", "b", "c"], name="key")
255257

@@ -277,7 +279,7 @@ def test_join_on_key():
277279
tm.assert_frame_equal(df2, df2_orig)
278280

279281

280-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
282+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
281283
def test_join_multiple_dataframes_on_key():
282284
df_index = Index(["a", "b", "c"], name="key")
283285

pandas/tests/copy_view/test_interp_fillna.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
7+
68
from pandas import (
79
NA,
810
DataFrame,
@@ -121,7 +123,7 @@ def test_interpolate_cannot_with_object_dtype():
121123
df.interpolate()
122124

123125

124-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
126+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
125127
def test_interpolate_object_convert_no_op():
126128
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
127129
arr_a = get_array(df, "a")

pandas/tests/copy_view/test_methods.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
7+
68
import pandas as pd
79
from pandas import (
810
DataFrame,
@@ -714,7 +716,7 @@ def test_head_tail(method):
714716
tm.assert_frame_equal(df, df_orig)
715717

716718

717-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
719+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
718720
def test_infer_objects():
719721
df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
720722
df_orig = df.copy()
@@ -730,6 +732,9 @@ def test_infer_objects():
730732
tm.assert_frame_equal(df, df_orig)
731733

732734

735+
@pytest.mark.xfail(
736+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
737+
)
733738
def test_infer_objects_no_reference():
734739
df = DataFrame(
735740
{
@@ -899,7 +904,7 @@ def test_sort_values_inplace(obj, kwargs):
899904
tm.assert_equal(view, obj_orig)
900905

901906

902-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
907+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
903908
@pytest.mark.parametrize("decimals", [-1, 0, 1])
904909
def test_round(decimals):
905910
df = DataFrame({"a": [1, 2], "b": "c"})

0 commit comments

Comments
 (0)