Skip to content

TST (string dtype): add test build with future strings enabled without pyarrow #59437

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,9 @@ runs:
condarc-file: ci/.condarc
cache-environment: true
cache-downloads: true

- name: Uninstall pyarrow
if: ${{ env.REMOVE_PYARROW == '1' }}
run: |
micromamba remove -y pyarrow
shell: bash -el {0}
7 changes: 6 additions & 1 deletion .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml]
# Prevent the include jobs from overriding other jobs
pattern: [""]
pandas_future_infer_string: ["0"]
include:
- name: "Downstream Compat"
env_file: actions-311-downstream_compat.yaml
Expand Down Expand Up @@ -58,6 +59,9 @@ jobs:
# It will be temporarily activated during tests with locale.setlocale
extra_loc: "zh_CN"
- name: "Future infer strings"
env_file: actions-312.yaml
pandas_future_infer_string: "1"
- name: "Future infer strings (without pyarrow)"
env_file: actions-311.yaml
pandas_future_infer_string: "1"
- name: "Pypy"
Expand Down Expand Up @@ -85,9 +89,10 @@ jobs:
NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }}
# Clipboard tests
QT_QPA_PLATFORM: offscreen
REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }}
concurrency:
# https://github.community/t/concurrecy-not-work-for-push/183068/7
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}}
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }}
cancel-in-progress: true

services:
Expand Down
5 changes: 5 additions & 0 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.dtypes import CategoricalDtype

import pandas as pd
Expand Down Expand Up @@ -1245,6 +1247,9 @@ def test_agg_multiple_mixed():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_agg_multiple_mixed_raises():
# GH 20909
mdf = DataFrame(
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/apply/test_invalid_arg.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.errors import SpecificationError

from pandas import (
Expand Down Expand Up @@ -209,6 +212,10 @@ def transform(row):
data.apply(transform, axis=1)


# we should raise a proper TypeError instead of propagating the pyarrow error
@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
@pytest.mark.parametrize(
"df, func, expected",
tm.get_cython_table_params(
Expand All @@ -229,6 +236,10 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str
df.agg(func, axis=axis)


# we should raise a proper TypeError instead of propagating the pyarrow error
@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
@pytest.mark.parametrize(
"series, func, expected",
chain(
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/apply/test_numba.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):


def test_numba_unsupported_dtypes(apply_axis):
pytest.importorskip("pyarrow")
f = lambda x: x
df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
df["c"] = df["c"].astype("double[pyarrow]")
Expand Down
6 changes: 6 additions & 0 deletions pandas/tests/arithmetic/test_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
import pandas.util._test_decorators as td

import pandas as pd
Expand Down Expand Up @@ -315,6 +318,9 @@ def test_add(self):
expected = pd.Index(["1a", "1b", "1c"])
tm.assert_index_equal("1" + index, expected)

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_sub_fail(self, using_infer_string):
index = pd.Index([str(i) for i in range(10)])

Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/arrays/boolean/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
# -----------------------------------------------------------------------------


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
# invalid ops

Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

from pandas._config import using_string_dtype

from pandas.compat import PYPY
from pandas.compat import (
HAS_PYARROW,
PYPY,
)

from pandas import (
Categorical,
Expand Down Expand Up @@ -296,7 +299,9 @@ def test_nbytes(self):
exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
assert cat.nbytes == exp

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
)
def test_memory_usage(self):
cat = Categorical([1, 2, 3])

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.common import (
is_float_dtype,
is_integer_dtype,
Expand Down Expand Up @@ -442,7 +444,9 @@ def test_constructor_str_unknown(self):
with pytest.raises(ValueError, match="Unknown dtype"):
Categorical([1, 2], dtype="foo")

@pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings")
@pytest.mark.xfail(
using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings"
)
def test_constructor_np_strs(self):
# GH#31499 Hashtable.map_locations needs to work on np.str_ objects
cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")])
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/arrays/integer/test_reduction.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import numpy as np
import pytest

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected):
["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")],
],
)
def test_mixed_reductions(op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string:
expected = expected.astype("bool")
def test_mixed_reductions(request, op, expected, using_infer_string):
if op in ["any", "all"] and using_infer_string and HAS_PYARROW:
# TODO(infer_string) inconsistent result type
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
df = DataFrame(
{
"A": ["a", "b", "b"],
Expand Down
12 changes: 11 additions & 1 deletion pandas/tests/base/test_conversion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas.core.dtypes.dtypes import DatetimeTZDtype

import pandas as pd
Expand All @@ -20,6 +24,7 @@
SparseArray,
TimedeltaArray,
)
from pandas.core.arrays.string_ import StringArrayNumpySemantics
from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics


Expand Down Expand Up @@ -218,7 +223,9 @@ def test_iter_box_period(self):
)
def test_values_consistent(arr, expected_type, dtype, using_infer_string):
if using_infer_string and dtype == "object":
expected_type = ArrowStringArrayNumpySemantics
expected_type = (
ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics
)
l_values = Series(arr)._values
r_values = pd.Index(arr)._values
assert type(l_values) is expected_type
Expand Down Expand Up @@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request):
tm.assert_numpy_array_equal(result, expected)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
)
@pytest.mark.parametrize("as_series", [True, False])
@pytest.mark.parametrize(
"arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/copy_view/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW
from pandas.compat.pyarrow import pa_version_under12p0
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -197,7 +198,7 @@ def test_astype_arrow_timestamp():
assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_convert_dtypes_infer_objects():
ser = Series(["a", "b", "c"])
ser_orig = ser.copy()
Expand All @@ -213,7 +214,7 @@ def test_convert_dtypes_infer_objects():
tm.assert_series_equal(ser, ser_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_convert_dtypes():
df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]})
df_orig = df.copy()
Expand Down
12 changes: 7 additions & 5 deletions pandas/tests/copy_view/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas import (
DataFrame,
Index,
Expand All @@ -14,7 +16,7 @@
from pandas.tests.copy_view.util import get_array


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_concat_frames():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand All @@ -33,7 +35,7 @@ def test_concat_frames():
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_concat_frames_updating_input():
df = DataFrame({"b": ["a"] * 3})
df2 = DataFrame({"a": ["a"] * 3})
Expand Down Expand Up @@ -153,7 +155,7 @@ def test_concat_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
@pytest.mark.parametrize(
"func",
[
Expand Down Expand Up @@ -249,7 +251,7 @@ def test_merge_copy_keyword():
assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_join_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down Expand Up @@ -277,7 +279,7 @@ def test_join_on_key():
tm.assert_frame_equal(df2, df2_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_join_multiple_dataframes_on_key():
df_index = Index(["a", "b", "c"], name="key")

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/copy_view/test_interp_fillna.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

from pandas import (
NA,
DataFrame,
Expand Down Expand Up @@ -121,7 +123,7 @@ def test_interpolate_cannot_with_object_dtype():
df.interpolate()


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_interpolate_object_convert_no_op():
df = DataFrame({"a": ["a", "b", "c"], "b": 1})
arr_a = get_array(df, "a")
Expand Down
9 changes: 7 additions & 2 deletions pandas/tests/copy_view/test_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

from pandas._config import using_string_dtype

from pandas.compat import HAS_PYARROW

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -714,7 +716,7 @@ def test_head_tail(method):
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
def test_infer_objects():
df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
df_orig = df.copy()
Expand All @@ -730,6 +732,9 @@ def test_infer_objects():
tm.assert_frame_equal(df, df_orig)


@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
)
def test_infer_objects_no_reference():
df = DataFrame(
{
Expand Down Expand Up @@ -899,7 +904,7 @@ def test_sort_values_inplace(obj, kwargs):
tm.assert_equal(view, obj_orig)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
@pytest.mark.parametrize("decimals", [-1, 0, 1])
def test_round(decimals):
df = DataFrame({"a": [1, 2], "b": "c"})
Expand Down
Loading