more xfails specific for no pyarrow

jorisvandenbossche · jorisvandenbossche · commit 0bcbef282ae6 · 2024-08-08T14:37:08.000+02:00
diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py
@@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis):
 
 
 def test_numba_unsupported_dtypes(apply_axis):
+    pytest.importorskip("pyarrow")
     f = lambda x: x
     df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]})
     df["c"] = df["c"].astype("double[pyarrow]")
diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py
@@ -3,6 +3,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname):
 # -----------------------------------------------------------------------------
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string):
     # invalid ops
 
diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py
@@ -6,6 +6,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas.core.dtypes.common import is_scalar
 
 import pandas as pd
@@ -1018,6 +1020,9 @@ def test_where_producing_ea_cond_for_np_dtype():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False
+)
 @pytest.mark.parametrize(
     "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)]
 )
diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py
@@ -10,6 +10,7 @@
 from pandas._config import using_string_dtype
 
 from pandas.compat import (
+    HAS_PYARROW,
     IS64,
     PYPY,
 )
@@ -520,7 +521,7 @@ def test_info_int_columns():
     assert result == expected
 
 
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)")
 def test_memory_usage_empty_no_warning():
     # GH#50066
     df = DataFrame(index=["a", "b"])
diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py
@@ -6,10 +6,13 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
 from pandas._libs.algos import (
     Infinity,
     NegInfinity,
 )
+from pandas.compat import HAS_PYARROW
 
 from pandas import (
     DataFrame,
@@ -464,9 +467,18 @@ def test_rank_inf_nans_na_option(
         ],
     )
     def test_rank_object_first(
-        self, frame_or_series, na_option, ascending, expected, using_infer_string
+        self,
+        request,
+        frame_or_series,
+        na_option,
+        ascending,
+        expected,
+        using_infer_string,
     ):
         obj = frame_or_series(["foo", "foo", None, "foo"])
+        if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series):
+            request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
+
         result = obj.rank(method="first", na_option=na_option, ascending=ascending)
         expected = frame_or_series(expected)
         if using_infer_string and isinstance(obj, Series):
diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 import pandas._testing as tm
 
@@ -132,6 +136,9 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_data_frame_value_counts_dropna_false(nulls_fixture):
     # GH 41334
     df = pd.DataFrame(
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
@@ -8,6 +8,8 @@
 from pandas._config import using_string_dtype
 from pandas._config.config import option_context
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -113,7 +115,9 @@ def test_not_hashable(self):
         with pytest.raises(TypeError, match=msg):
             hash(empty_frame)
 
-    @pytest.mark.xfail(using_string_dtype(), reason="surrogates not allowed")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="surrogates not allowed"
+    )
     def test_column_name_contains_unicode_surrogate(self):
         # GH 25509
         colname = "\ud83d"
diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -13,6 +13,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -1542,7 +1544,9 @@ def test_comparisons(self, simple_frame, float_frame, func):
         with pytest.raises(ValueError, match=msg):
             func(simple_frame, simple_frame[:2])
 
-    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+    @pytest.mark.xfail(
+        using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne):
         # GH 11565
         df = DataFrame(
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -24,6 +24,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import lib
+from pandas.compat import HAS_PYARROW
 from pandas.compat.numpy import np_version_gt2
 from pandas.errors import IntCastingNaNError
 
@@ -299,7 +300,7 @@ def test_constructor_dtype_nocast_view_2d_array(self):
         df2 = DataFrame(df.values, dtype=df[0].dtype)
         assert df2._mgr.blocks[0].values.flags.c_contiguous
 
-    @pytest.mark.xfail(using_string_dtype(), reason="conversion copies")
+    @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies")
     def test_1d_object_array_does_not_copy(self):
         # https://github.com/pandas-dev/pandas/issues/39272
         arr = np.array(["a", "b"], dtype="object")
diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py
@@ -4,6 +4,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     CategoricalIndex,
     DataFrame,
@@ -96,6 +100,9 @@ def test_logical_ops_int_frame(self):
         res_ser = df1a_int["A"] | df1a_bool["A"]
         tm.assert_series_equal(res_ser, df1a_bool["A"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_ops_invalid(self, using_infer_string):
         # GH#5808
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1408,7 +1408,7 @@ def g(group):
 
 
 # TODO harmonize error messages
-@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
+@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
 @pytest.mark.parametrize("grouper", ["A", ["A", "B"]])
 def test_set_group_name(df, grouper, using_infer_string):
     def f(group):
diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py
@@ -245,6 +245,7 @@ def test_repr_max_seq_item_setting(self, simple_index):
             repr(idx)
             assert "..." not in str(idx)
 
+    @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
     @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
     def test_ensure_copied_data(self, index):
         # Check the "copy" argument of each Index.__new__ is honoured
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -16,6 +16,7 @@
 from pandas._config import using_string_dtype
 
 from pandas._libs import index as libindex
+from pandas.compat import HAS_PYARROW
 from pandas.errors import IndexingError
 
 import pandas as pd
@@ -1388,6 +1389,9 @@ def test_loc_setitem_categorical_values_partial_column_slice(self):
             df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"])
             df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"])
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_loc_setitem_single_row_categorical(self, using_infer_string):
         # GH#25495
         df = DataFrame({"Alpha": ["a"], "Numeric": [0]})
diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py
@@ -4,6 +4,7 @@
 
 import pytest
 
+from pandas.compat import HAS_PYARROW
 from pandas.compat._optional import VERSIONS
 
 from pandas import (
@@ -117,7 +118,15 @@ def csv1(datapath):
 
 _py_parsers_only = [_pythonParser]
 _c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
-_pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
+_pyarrow_parsers_only = [
+    pytest.param(
+        _pyarrowParser,
+        marks=[
+            pytest.mark.single_cpu,
+            pytest.mark.skipif(not HAS_PYARROW, reason="pyarrow is not installed"),
+        ],
+    )
+]
 
 _all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
 
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
@@ -4,6 +4,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import (
     DataFrame,
@@ -160,6 +164,9 @@ def test_attrs(self):
         result = s + 1
         assert result.attrs == {"version": 1}
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_inspect_getmembers(self):
         # GH38782
         pytest.importorskip("jinja2")
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
@@ -13,6 +13,7 @@
 
 from pandas._libs import lib
 from pandas._libs.tslibs import IncompatibleFrequency
+from pandas.compat import HAS_PYARROW
 
 import pandas as pd
 from pandas import (
@@ -193,6 +194,9 @@ def test_string_addition(self, target_add, input_value, expected_value):
         expected = Series(expected_value)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_divmod(self):
         # GH#25557
         a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"])
diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py
@@ -6,6 +6,8 @@
 
 from pandas._config import using_string_dtype
 
+from pandas.compat import HAS_PYARROW
+
 from pandas import (
     DataFrame,
     Index,
@@ -143,6 +145,9 @@ def test_logical_operators_int_dtype_with_bool(self):
         expected = Series([False, True, True, True])
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_operators_int_dtype_with_object(self, using_infer_string):
         # GH#9016: support bitwise op for integer types
         s_0123 = Series(range(4), dtype="int64")
@@ -462,6 +467,9 @@ def test_logical_ops_label_based(self, using_infer_string):
             with pytest.raises(TypeError, match=msg):
                 t & v
 
+    @pytest.mark.xfail(
+        using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+    )
     def test_logical_ops_df_compat(self):
         # GH#1134
         s1 = Series([True, False, True], index=list("ABC"), name="x")
diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py
@@ -1,6 +1,10 @@
 import numpy as np
 import pytest
 
+from pandas._config import using_string_dtype
+
+from pandas.compat import HAS_PYARROW
+
 import pandas as pd
 from pandas import Series
 import pandas._testing as tm
@@ -162,6 +166,9 @@ def test_validate_stat_keepdims():
         np.sum(ser, keepdims=True)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_mean_with_convertible_string_raises(using_infer_string):
     # GH#44008
     ser = Series(["1", "2"])
@@ -181,6 +188,9 @@ def test_mean_with_convertible_string_raises(using_infer_string):
         df.mean()
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_mean_dont_convert_j_to_complex():
     # GH#36703
     df = pd.DataFrame([{"db": "J", "numeric": 123}])
@@ -199,6 +209,9 @@ def test_mean_dont_convert_j_to_complex():
         np.mean(df["db"].astype("string").array)
 
 
+@pytest.mark.xfail(
+    using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)"
+)
 def test_median_with_convertible_string_raises():
     # GH#34671 this _could_ return a string "2", but definitely not float 2.0
     msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support"
diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py