Skip to content

Commit 0c9483f

Browse files
committed
TST (string dtype): add test build with future strings enabled without pyarrow (pandas-dev#59437)
* TST (string dtype): add test build with future strings enabled without pyarrow * ensure the build doesn't override the default ones * uninstall -> remove * avoid jobs with same env being cancelled * use different python version for both future jobs * add some xfails * fixup xfails * less strict
1 parent 22a62e5 commit 0c9483f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+322
-33
lines changed

.github/actions/setup-conda/action.yml

+6
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,9 @@ runs:
1414
condarc-file: ci/.condarc
1515
cache-environment: true
1616
cache-downloads: true
17+
18+
- name: Uninstall pyarrow
19+
if: ${{ env.REMOVE_PYARROW == '1' }}
20+
run: |
21+
micromamba remove -y pyarrow
22+
shell: bash -el {0}

.github/workflows/unit-tests.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ jobs:
2929
env_file: [actions-39.yaml, actions-310.yaml, actions-311.yaml, actions-312.yaml]
3030
# Prevent the include jobs from overriding other jobs
3131
pattern: [""]
32+
pandas_future_infer_string: ["0"]
3233
include:
3334
- name: "Downstream Compat"
3435
env_file: actions-311-downstream_compat.yaml
@@ -86,6 +87,9 @@ jobs:
8687
pattern: "not slow and not network and not single_cpu"
8788
pandas_copy_on_write: "warn"
8889
- name: "Future infer strings"
90+
env_file: actions-312.yaml
91+
pandas_future_infer_string: "1"
92+
- name: "Future infer strings (without pyarrow)"
8993
env_file: actions-311.yaml
9094
pandas_future_infer_string: "1"
9195
- name: "Pypy"
@@ -114,9 +118,10 @@ jobs:
114118
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
115119
# Clipboard tests
116120
QT_QPA_PLATFORM: offscreen
121+
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
117122
concurrency:
118123
# https://github.community/t/concurrecy-not-work-for-push/183068/7
119-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}
124+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_copy_on_write || '' }}-${{ matrix.pandas_future_infer_string }}
120125
cancel-in-progress: true
121126

122127
services:

pandas/tests/apply/test_frame_apply.py

+5
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from pandas._config import using_string_dtype
88

9+
from pandas.compat import HAS_PYARROW
10+
911
from pandas.core.dtypes.dtypes import CategoricalDtype
1012

1113
import pandas as pd
@@ -1201,6 +1203,9 @@ def test_agg_multiple_mixed():
12011203
tm.assert_frame_equal(result, expected)
12021204

12031205

1206+
@pytest.mark.xfail(
1207+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
1208+
)
12041209
def test_agg_multiple_mixed_raises():
12051210
# GH 20909
12061211
mdf = DataFrame(

pandas/tests/apply/test_invalid_arg.py

+11
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
import numpy as np
1313
import pytest
1414

15+
from pandas._config import using_string_dtype
16+
17+
from pandas.compat import HAS_PYARROW
1518
from pandas.errors import SpecificationError
1619

1720
from pandas import (
@@ -209,6 +212,10 @@ def transform(row):
209212
data.apply(transform, axis=1)
210213

211214

215+
# we should raise a proper TypeError instead of propagating the pyarrow error
216+
@pytest.mark.xfail(
217+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
218+
)
212219
@pytest.mark.parametrize(
213220
"df, func, expected",
214221
tm.get_cython_table_params(
@@ -229,6 +236,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
229236
df.agg(func, axis=axis)
230237

231238

239+
# we should raise a proper TypeError instead of propagating the pyarrow error
240+
@pytest.mark.xfail(
241+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
242+
)
232243
@pytest.mark.parametrize(
233244
"series, func, expected",
234245
chain(

pandas/tests/apply/test_numba.py

+1
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):
104104

105105

106106
def test_numba_unsupported_dtypes(apply_axis):
107+
pytest.importorskip("pyarrow")
107108
f = lambda x: x
108109
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
109110
df["c"] = df["c"].astype("double[pyarrow]")

pandas/tests/arithmetic/test_object.py

+6
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
import numpy as np
99
import pytest
1010

11+
from pandas._config import using_string_dtype
12+
13+
from pandas.compat import HAS_PYARROW
1114
import pandas.util._test_decorators as td
1215

1316
import pandas as pd
@@ -315,6 +318,9 @@ def test_add(self):
315318
expected = pd.Index(["1a", "1b", "1c"])
316319
tm.assert_index_equal("1" + index, expected)
317320

321+
@pytest.mark.xfail(
322+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
323+
)
318324
def test_sub_fail(self, using_infer_string):
319325
index = pd.Index([str(i) for i in range(10)])
320326

pandas/tests/arrays/boolean/test_arithmetic.py

+7
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._config import using_string_dtype
7+
8+
from pandas.compat import HAS_PYARROW
9+
610
import pandas as pd
711
import pandas._testing as tm
812

@@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
9094
# -----------------------------------------------------------------------------
9195

9296

97+
@pytest.mark.xfail(
98+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
99+
)
93100
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
94101
# invalid ops
95102

pandas/tests/arrays/categorical/test_analytics.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
from pandas._config import using_string_dtype
88

9-
from pandas.compat import PYPY
9+
from pandas.compat import (
10+
HAS_PYARROW,
11+
PYPY,
12+
)
1013

1114
from pandas import (
1215
Categorical,
@@ -298,7 +301,9 @@ def test_nbytes(self):
298301
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
299302
assert cat.nbytes == exp
300303

301-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
304+
@pytest.mark.xfail(
305+
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
306+
)
302307
def test_memory_usage(self):
303308
cat = Categorical([1, 2, 3])
304309

pandas/tests/arrays/categorical/test_constructors.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from pandas._config import using_string_dtype
1010

11+
from pandas.compat import HAS_PYARROW
12+
1113
from pandas.core.dtypes.common import (
1214
is_float_dtype,
1315
is_integer_dtype,
@@ -449,7 +451,9 @@ def test_constructor_str_unknown(self):
449451
with pytest.raises(ValueError, match="Unknown dtype"):
450452
Categorical([1, 2], dtype="foo")
451453

452-
@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
454+
@pytest.mark.xfail(
455+
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
456+
)
453457
def test_constructor_np_strs(self):
454458
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
455459
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])

pandas/tests/arrays/integer/test_reduction.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.compat import HAS_PYARROW
5+
46
import pandas as pd
57
from pandas import (
68
DataFrame,
@@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected):
102104
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
103105
],
104106
)
105-
def test_mixed_reductions(op, expected, using_infer_string):
106-
if op in ["any", "all"] and using_infer_string:
107-
expected = expected.astype("bool")
107+
def test_mixed_reductions(request, op, expected, using_infer_string):
108+
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
109+
# TODO(infer_string) inconsistent result type
110+
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
108111
df = DataFrame(
109112
{
110113
"A": ["a", "b", "b"],

pandas/tests/base/test_conversion.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
import numpy as np
22
import pytest
33

4+
from pandas._config import using_string_dtype
5+
6+
from pandas.compat import HAS_PYARROW
7+
48
from pandas.core.dtypes.dtypes import DatetimeTZDtype
59

610
import pandas as pd
@@ -20,6 +24,7 @@
2024
SparseArray,
2125
TimedeltaArray,
2226
)
27+
from pandas.core.arrays.string_ import StringArrayNumpySemantics
2328
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics
2429

2530

@@ -218,7 +223,9 @@ def test_iter_box_period(self):
218223
)
219224
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
220225
if using_infer_string and dtype == "object":
221-
expected_type = ArrowStringArrayNumpySemantics
226+
expected_type = (
227+
ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics
228+
)
222229
l_values = Series(arr)._values
223230
r_values = pd.Index(arr)._values
224231
assert type(l_values) is expected_type
@@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
355362
tm.assert_numpy_array_equal(result, expected)
356363

357364

365+
@pytest.mark.xfail(
366+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
367+
)
358368
@pytest.mark.parametrize("as_series", [True, False])
359369
@pytest.mark.parametrize(
360370
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]

pandas/tests/copy_view/test_astype.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas._config import using_string_dtype
77

8+
from pandas.compat import HAS_PYARROW
89
from pandas.compat.pyarrow import pa_version_under12p0
910
import pandas.util._test_decorators as td
1011

@@ -223,7 +224,7 @@ def test_astype_arrow_timestamp(using_copy_on_write):
223224
)
224225

225226

226-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
227+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
227228
def test_convert_dtypes_infer_objects(using_copy_on_write):
228229
ser = Series(["a", "b", "c"])
229230
ser_orig = ser.copy()
@@ -243,7 +244,7 @@ def test_convert_dtypes_infer_objects(using_copy_on_write):
243244
tm.assert_series_equal(ser, ser_orig)
244245

245246

246-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
247+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
247248
def test_convert_dtypes(using_copy_on_write):
248249
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
249250
df_orig = df.copy()

pandas/tests/copy_view/test_functions.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
7+
68
from pandas import (
79
DataFrame,
810
Index,
@@ -14,7 +16,7 @@
1416
from pandas.tests.copy_view.util import get_array
1517

1618

17-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
19+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
1820
def test_concat_frames(using_copy_on_write):
1921
df = DataFrame({"b": ["a"] * 3})
2022
df2 = DataFrame({"a": ["a"] * 3})
@@ -39,7 +41,7 @@ def test_concat_frames(using_copy_on_write):
3941
tm.assert_frame_equal(df, df_orig)
4042

4143

42-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
44+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
4345
def test_concat_frames_updating_input(using_copy_on_write):
4446
df = DataFrame({"b": ["a"] * 3})
4547
df2 = DataFrame({"a": ["a"] * 3})
@@ -201,7 +203,7 @@ def test_concat_copy_keyword(using_copy_on_write, copy):
201203
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
202204

203205

204-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
206+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
205207
@pytest.mark.parametrize(
206208
"func",
207209
[
@@ -319,7 +321,7 @@ def test_merge_copy_keyword(using_copy_on_write, copy):
319321
assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
320322

321323

322-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
324+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
323325
def test_join_on_key(using_copy_on_write):
324326
df_index = Index(["a", "b", "c"], name="key")
325327

@@ -353,7 +355,7 @@ def test_join_on_key(using_copy_on_write):
353355
tm.assert_frame_equal(df2, df2_orig)
354356

355357

356-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
358+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
357359
def test_join_multiple_dataframes_on_key(using_copy_on_write):
358360
df_index = Index(["a", "b", "c"], name="key")
359361

pandas/tests/copy_view/test_interp_fillna.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
7+
68
from pandas import (
79
NA,
810
ArrowDtype,
@@ -159,7 +161,7 @@ def test_interpolate_cleaned_fill_method(using_copy_on_write):
159161
tm.assert_frame_equal(df, df_orig)
160162

161163

162-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
164+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
163165
def test_interpolate_object_convert_no_op(using_copy_on_write):
164166
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
165167
arr_a = get_array(df, "a")

pandas/tests/copy_view/test_methods.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from pandas._config import using_string_dtype
55

6+
from pandas.compat import HAS_PYARROW
67
from pandas.errors import SettingWithCopyWarning
78

89
import pandas as pd
@@ -952,7 +953,7 @@ def test_head_tail(method, using_copy_on_write, warn_copy_on_write):
952953
tm.assert_frame_equal(df, df_orig)
953954

954955

955-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
956+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
956957
def test_infer_objects(using_copy_on_write):
957958
df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
958959
df_orig = df.copy()
@@ -974,6 +975,9 @@ def test_infer_objects(using_copy_on_write):
974975
tm.assert_frame_equal(df, df_orig)
975976

976977

978+
@pytest.mark.xfail(
979+
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
980+
)
977981
def test_infer_objects_no_reference(using_copy_on_write):
978982
df = DataFrame(
979983
{
@@ -1180,7 +1184,7 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ
11801184
assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
11811185

11821186

1183-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
1187+
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
11841188
@pytest.mark.parametrize("decimals", [-1, 0, 1])
11851189
def test_round(using_copy_on_write, warn_copy_on_write, decimals):
11861190
df = DataFrame({"a": [1, 2], "b": "c"})

0 commit comments

Comments
 (0)