From f73e3d40e8425dd7d7d622ebb7d77c8132f06bb9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 11:13:38 -0700 Subject: [PATCH 1/7] Bump CI and docs pyarrow to 5.0 --- .github/workflows/ubuntu.yml | 4 +--- ci/deps/actions-38-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 4 ++-- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/compat/_optional.py | 2 +- 5 files changed, 6 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 260c857e608b3..80bcd34ff4bf8 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -31,9 +31,7 @@ jobs: matrix: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml] pattern: ["not single_cpu", "single_cpu"] - # Don't test pyarrow v2/3: Causes timeouts in read_csv engine - # even if tests are skipped/xfailed - pyarrow_version: ["5", "6", "7"] + pyarrow_version: ["6", "7", "8"] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index fd23080c2ab04..601581868f661 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -39,7 +39,7 @@ dependencies: - openpyxl=3.0.7 - pandas-gbq=0.15.0 - psycopg2=2.8.6 - - pyarrow=1.0.1 + - pyarrow=5.0.0 - pymysql=1.0.2 - pyreadstat=1.1.2 - pytables=3.6.1 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 00251854e3ffa..e7038c8aa4bbb 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -388,7 +388,7 @@ PyTables 3.6.1 HDF5-based reading / writing blosc 1.21.0 Compression for HDF5 zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing -pyarrow 1.0.1 Parquet, ORC, and feather reading / writing +pyarrow 5.0.0 Parquet, ORC, and feather reading / writing pyreadstat 1.1.2 SPSS files (.sav) reading ========================= ================== ============================================================= @@ -402,7 +402,7 @@ pyreadstat 1.1.2 SPSS files (.sav) reading ========================= ================== ============================================================= System Conda PyPI ========================= ================== ============================================================= - Linux Successful Failed(pyarrow==3.0 Successful) + Linux Successful Failed macOS Successful Failed Windows Failed Failed ========================= ================== ============================================================= diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 89cfa8c580523..3813b766e4f2b 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -107,7 +107,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | Package | Minimum Version | Changed | +=================+=================+=========+ -| | | X | +| pyarrow | 5.0.0 | X | +-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 3caa92758dd52..ef919d0622399 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -31,7 +31,7 @@ "pandas_gbq": "0.15.0", "psycopg2": "2.8.6", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", - "pyarrow": "1.0.1", + "pyarrow": "5.0.0", "pyreadstat": "1.1.2", "pytest": "6.0", "pyxlsb": "1.0.8", From 7baec4efb10e8e9ed77805c9b59cd5fb7870b864 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 11:22:46 -0700 Subject: [PATCH 2/7] Remove 1.0.1 check --- pandas/_testing/__init__.py | 4 ++-- pandas/compat/__init__.py | 2 -- pandas/compat/pyarrow.py | 2 -- pandas/core/arrays/arrow/array.py | 7 +++---- pandas/core/arrays/arrow/dtype.py | 8 ++++---- pandas/core/arrays/string_.py | 6 +++--- pandas/core/arrays/string_arrow.py | 8 ++++---- pandas/tests/arrays/string_/test_string_arrow.py | 8 ++++---- pandas/tests/groupby/test_groupby_dropna.py | 4 ++-- pandas/tests/indexes/multi/test_constructors.py | 4 ++-- 10 files changed, 24 insertions(+), 29 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index ce6b6ceea5604..bbf1163ae9658 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -29,7 +29,7 @@ Dtype, Frequency, ) -from pandas.compat import pa_version_under1p01 +from pandas.compat import pa_version_under5p0 from pandas.core.dtypes.common import ( is_float_dtype, @@ -195,7 +195,7 @@ ] ] -if not pa_version_under1p01: +if not pa_version_under5p0: import pyarrow as pa UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 80f66c945ba27..51bddb18754a6 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -154,7 +153,6 @@ def get_lzma_file() -> type[lzma.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under1p01", "pa_version_under2p0", "pa_version_under3p0", "pa_version_under4p0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 6965865acb5da..c99d351a9551e 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,7 +9,6 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under1p01 = _palv < Version("1.0.1") pa_version_under2p0 = _palv < Version("2.0.0") pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") @@ -19,7 +18,6 @@ pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") except ImportError: - pa_version_under1p01 = True pa_version_under2p0 = True pa_version_under3p0 = True pa_version_under4p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index f6f933b1b9917..df169f5a20782 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -48,7 +47,7 @@ validate_indices, ) -if not pa_version_under1p01: +if not pa_version_under5p0: import pyarrow as pa import pyarrow.compute as pc @@ -206,8 +205,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): _dtype: ArrowDtype def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: - if pa_version_under1p01: - msg = "pyarrow>=1.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under5p0: + msg = "pyarrow>=5.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) if isinstance(values, pa.Array): self._data = pa.chunked_array([values]) diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 48e2c5bdda2f8..4f864abe811df 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -5,7 +5,7 @@ import numpy as np from pandas._typing import DtypeObj -from pandas.compat import pa_version_under1p01 +from pandas.compat import pa_version_under5p0 from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ( @@ -13,7 +13,7 @@ register_extension_dtype, ) -if not pa_version_under1p01: +if not pa_version_under5p0: import pyarrow as pa @@ -66,8 +66,8 @@ class ArrowDtype(StorageExtensionDtype): def __init__(self, pyarrow_dtype: pa.DataType) -> None: super().__init__("pyarrow") - if pa_version_under1p01: - raise ImportError("pyarrow>=1.0.1 is required for ArrowDtype") + if pa_version_under5p0: + raise ImportError("pyarrow>=5.0.0 is required for ArrowDtype") if not isinstance(pyarrow_dtype, pa.DataType): raise ValueError( f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 2f9857eb43860..d0f580f9a8325 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -18,7 +18,7 @@ npt, type_t, ) -from pandas.compat import pa_version_under1p01 +from pandas.compat import pa_version_under5p0 from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ( @@ -106,9 +106,9 @@ def __init__(self, storage=None) -> None: raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) - if storage == "pyarrow" and pa_version_under1p01: + if storage == "pyarrow" and pa_version_under5p0: raise ImportError( - "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + "pyarrow>=5.0.0 is required for PyArrow backed StringArray." ) self.storage = storage diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index faa662611c0e1..5e68cb93d6d40 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -17,10 +17,10 @@ npt, ) from pandas.compat import ( - pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, + pa_version_under5p0, ) from pandas.core.dtypes.common import ( @@ -44,7 +44,7 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -if not pa_version_under1p01: +if not pa_version_under5p0: import pyarrow as pa import pyarrow.compute as pc @@ -54,8 +54,8 @@ def _chk_pyarrow_available() -> None: - if pa_version_under1p01: - msg = "pyarrow>=1.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under5p0: + msg = "pyarrow>=5.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index f43cf298857a0..3ed70c086c7a8 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under1p01 +from pandas.compat import pa_version_under5p0 import pandas as pd import pandas._testing as tm @@ -14,8 +14,8 @@ from pandas.core.arrays.string_arrow import ArrowStringArray skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under1p01, - reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", + pa_version_under5p0, + reason="pyarrow>=5.0.0 is required for PyArrow backed StringArray", ) @@ -118,7 +118,7 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - not pa_version_under1p01, + not pa_version_under5p0, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index ee660dd073ce9..f87f656ea896a 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under1p01 +from pandas.compat.pyarrow import pa_version_under5p0 from pandas.core.dtypes.missing import na_value_for_dtype @@ -415,7 +415,7 @@ def test_groupby_drop_nan_with_multi_index(): pytest.param( "string[pyarrow]", marks=pytest.mark.skipif( - pa_version_under1p01, reason="pyarrow is not installed" + pa_version_under5p0, reason="pyarrow is not installed" ), ), "datetime64[ns]", diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 4dc3c5a4ae8b9..9738ea5b5d35e 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under1p01 +from pandas.compat import pa_version_under5p0 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -650,7 +650,7 @@ def test_from_frame(): tm.assert_index_equal(expected, result) -@pytest.mark.skipif(pa_version_under1p01, reason="Import Problem") +@pytest.mark.skipif(pa_version_under5p0, reason="minimum pyarrow not installed") def test_from_frame_missing_values_multiIndex(): # GH 39984 import pyarrow as pa From bc6e2830603d453fa529b5579e56849c94078bc9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 11:45:20 -0700 Subject: [PATCH 3/7] Remove 2.0.0 check --- pandas/compat/__init__.py | 2 - pandas/compat/pyarrow.py | 2 - pandas/core/arrays/arrow/array.py | 83 +--- pandas/core/arrays/string_arrow.py | 9 - pandas/tests/arrays/string_/test_string.py | 26 +- pandas/tests/base/test_unique.py | 47 +- pandas/tests/extension/test_arrow.py | 527 ++------------------- pandas/tests/extension/test_string.py | 21 - pandas/tests/indexes/test_common.py | 16 +- pandas/tests/indexes/test_setops.py | 13 +- pandas/tests/io/test_parquet.py | 10 +- pandas/tests/strings/test_strings.py | 19 +- 12 files changed, 74 insertions(+), 701 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 51bddb18754a6..1868f6028c9f9 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, @@ -153,7 +152,6 @@ def get_lzma_file() -> type[lzma.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under2p0", "pa_version_under3p0", "pa_version_under4p0", "pa_version_under5p0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index c99d351a9551e..6277b7e254fb4 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,7 +9,6 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under2p0 = _palv < Version("2.0.0") pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") @@ -18,7 +17,6 @@ pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") except ImportError: - pa_version_under2p0 = True pa_version_under3p0 = True pa_version_under4p0 = True pa_version_under5p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index df169f5a20782..8b77b6f19836b 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, @@ -64,16 +63,12 @@ } ARROW_LOGICAL_FUNCS = { - "and": NotImplemented if pa_version_under2p0 else pc.and_kleene, - "rand": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.and_kleene(y, x), - "or": NotImplemented if pa_version_under2p0 else pc.or_kleene, - "ror": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.or_kleene(y, x), - "xor": NotImplemented if pa_version_under2p0 else pc.xor, - "rxor": NotImplemented if pa_version_under2p0 else lambda x, y: pc.xor(y, x), + "and": pc.and_kleene, + "rand": lambda x, y: pc.and_kleene(y, x), + "or": pc.or_kleene, + "ror": lambda x, y: pc.or_kleene(y, x), + "xor": pc.xor, + "rxor": lambda x, y: pc.xor(y, x), } def cast_for_truediv( @@ -99,30 +94,16 @@ def floordiv_compat( return result ARROW_ARITHMETIC_FUNCS = { - "add": NotImplemented if pa_version_under2p0 else pc.add_checked, - "radd": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.add_checked(y, x), - "sub": NotImplemented if pa_version_under2p0 else pc.subtract_checked, - "rsub": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.subtract_checked(y, x), - "mul": NotImplemented if pa_version_under2p0 else pc.multiply_checked, - "rmul": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.multiply_checked(y, x), - "truediv": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.divide_checked(cast_for_truediv(x, y), y), - "rtruediv": NotImplemented - if pa_version_under2p0 - else lambda x, y: pc.divide_checked(y, cast_for_truediv(x, y)), - "floordiv": NotImplemented - if pa_version_under2p0 - else lambda x, y: floordiv_compat(x, y), - "rfloordiv": NotImplemented - if pa_version_under2p0 - else lambda x, y: floordiv_compat(y, x), + "add": pc.add_checked, + "radd": lambda x, y: pc.add_checked(y, x), + "sub": pc.subtract_checked, + "rsub": lambda x, y: pc.subtract_checked(y, x), + "mul": pc.multiply_checked, + "rmul": lambda x, y: pc.multiply_checked(y, x), + "truediv": lambda x, y: pc.divide_checked(cast_for_truediv(x, y), y), + "rtruediv": lambda x, y: pc.divide_checked(y, cast_for_truediv(x, y)), + "floordiv": lambda x, y: floordiv_compat(x, y), + "rfloordiv": lambda x, y: floordiv_compat(y, x), "mod": NotImplemented, "rmod": NotImplemented, "divmod": NotImplemented, @@ -359,8 +340,6 @@ def __arrow_array__(self, type=None): return self._data def __invert__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: - if pa_version_under2p0: - raise NotImplementedError("__invert__ not implement for pyarrow < 2.0") return type(self)(pc.invert(self._data)) def __neg__(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: @@ -394,10 +373,7 @@ def _cmp_method(self, other, op): f"{op.__name__} not implemented for {type(other)}" ) - if pa_version_under2p0: - result = result.to_pandas().values - else: - result = result.to_numpy() + result = result.to_numpy() return BooleanArray._from_sequence(result) def _evaluate_op_method(self, other, op, arrow_funcs): @@ -463,10 +439,7 @@ def isna(self) -> npt.NDArray[np.bool_]: This should return a 1-D array the same length as 'self'. """ - if pa_version_under2p0: - return self._data.is_null().to_pandas().values - else: - return self._data.is_null().to_numpy() + return self._data.is_null().to_numpy() @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) def argsort( @@ -491,10 +464,7 @@ def argsort( result = pc.array_sort_indices( self._data, order=order, null_placement=null_placement ) - if pa_version_under2p0: - np_result = result.to_pandas().values - else: - np_result = result.to_numpy() + np_result = result.to_numpy() return np_result.astype(np.intp, copy=False) def _argmin_max(self, skipna: bool, method: str) -> int: @@ -547,10 +517,6 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: return type(self)(pc.drop_null(self._data)) def isin(self, values) -> npt.NDArray[np.bool_]: - if pa_version_under2p0: - fallback_performancewarning(version="2") - return super().isin(values) - # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True # for null values, so we short-circuit to return all False array. if not len(values): @@ -583,10 +549,7 @@ def _values_for_factorize(self) -> tuple[np.ndarray, Any]: The values returned by this method are also used in :func:`pandas.util.hash_pandas_object`. """ - if pa_version_under2p0: - values = self._data.to_pandas().values - else: - values = self._data.to_numpy() + values = self._data.to_numpy() return values, self.dtype.na_value @doc(ExtensionArray.factorize) @@ -739,11 +702,7 @@ def unique(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: ------- ArrowExtensionArray """ - if pa_version_under2p0: - fallback_performancewarning(version="2") - return super().unique() - else: - return type(self)(pc.unique(self._data)) + return type(self)(pc.unique(self._data)) def value_counts(self, dropna: bool = True) -> Series: """ diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 5e68cb93d6d40..7c30dba1e5e6c 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, @@ -202,10 +201,6 @@ def _maybe_convert_setitem_value(self, value): return value def isin(self, values) -> npt.NDArray[np.bool_]: - if pa_version_under2p0: - fallback_performancewarning(version="2") - return super().isin(values) - value_set = [ pa_scalar.as_py() for pa_scalar in [pa.scalar(value, from_pandas=True) for value in values] @@ -418,10 +413,6 @@ def _str_isnumeric(self): return BooleanDtype().__from_arrow__(result) def _str_isspace(self): - if pa_version_under2p0: - fallback_performancewarning(version="2") - return super()._str_isspace() - result = pc.utf8_is_space(self._data) return BooleanDtype().__from_arrow__(result) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index a7b8162eb981a..afa26147cf0fc 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -5,11 +5,7 @@ import numpy as np import pytest -from pandas.compat import ( - pa_version_under2p0, - pa_version_under6p0, -) -from pandas.errors import PerformanceWarning +from pandas.compat import pa_version_under6p0 import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_dtype_equal @@ -564,31 +560,19 @@ def test_to_numpy_na_value(dtype, nulls_fixture): def test_isin(dtype, fixed_now_ts): s = pd.Series(["a", "b", None], dtype=dtype) - with tm.maybe_produces_warning( - PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 - ): - result = s.isin(["a", "c"]) + result = s.isin(["a", "c"]) expected = pd.Series([True, False, False]) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 - ): - result = s.isin(["a", pd.NA]) + result = s.isin(["a", pd.NA]) expected = pd.Series([True, False, True]) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 - ): - result = s.isin([]) + result = s.isin([]) expected = pd.Series([False, False, False]) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, dtype.storage == "pyarrow" and pa_version_under2p0 - ): - result = s.isin(["a", fixed_now_ts]) + result = s.isin(["a", fixed_now_ts]) expected = pd.Series([True, False, False]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/base/test_unique.py b/pandas/tests/base/test_unique.py index 46b11ac533c7b..66cc000b9f458 100644 --- a/pandas/tests/base/test_unique.py +++ b/pandas/tests/base/test_unique.py @@ -1,9 +1,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under2p0 -from pandas.errors import PerformanceWarning - from pandas.core.dtypes.common import is_datetime64tz_dtype import pandas as pd @@ -15,12 +12,7 @@ def test_unique(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - result = obj.unique() + result = obj.unique() # dict.fromkeys preserves the order unique_values = list(dict.fromkeys(obj.values)) @@ -58,12 +50,7 @@ def test_unique_null(null_obj, index_or_series_obj): klass = type(obj) repeated_values = np.repeat(values, range(1, len(values) + 1)) obj = klass(repeated_values, dtype=obj.dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - result = obj.unique() + result = obj.unique() unique_values_raw = dict.fromkeys(obj.values) # because np.nan == np.nan is False, but None == None is True @@ -88,13 +75,8 @@ def test_unique_null(null_obj, index_or_series_obj): def test_nunique(index_or_series_obj): obj = index_or_series_obj obj = np.repeat(obj, range(1, len(obj) + 1)) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - expected = len(obj.unique()) - assert obj.nunique(dropna=False) == expected + expected = len(obj.unique()) + assert obj.nunique(dropna=False) == expected @pytest.mark.parametrize("null_obj", [np.nan, None]) @@ -117,24 +99,9 @@ def test_nunique_null(null_obj, index_or_series_obj): assert obj.nunique() == len(obj.categories) assert obj.nunique(dropna=False) == len(obj.categories) + 1 else: - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - num_unique_values = len(obj.unique()) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - assert obj.nunique() == max(0, num_unique_values - 1) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_or_series_obj.dtype, "storage", "") == "pyarrow", - ): - assert obj.nunique(dropna=False) == max(0, num_unique_values) + num_unique_values = len(obj.unique()) + assert obj.nunique() == max(0, num_unique_values - 1) + assert obj.nunique(dropna=False) == max(0, num_unique_values) @pytest.mark.single_cpu diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c6915e491554a..db71b1d504c5e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,9 +23,9 @@ from pandas.compat import ( is_ci_environment, is_platform_windows, - pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, + pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, @@ -213,20 +213,12 @@ class TestConstructors(base.BaseConstructorsTests): def test_from_dtype(self, data, request): pa_dtype = data.dtype.pyarrow_dtype if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz: - if pa_version_under2p0: - request.node.add_marker( - pytest.mark.xfail( - reason=f"timestamp data with tz={pa_dtype.tz} " - "converted to integer when pyarrow < 2.0", - ) - ) - else: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", - ) + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", ) + ) super().test_from_dtype(data) def test_from_sequence_pa_array(self, data, request): @@ -335,8 +327,6 @@ def test_take_series(self, request, data): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) unit = getattr(data.dtype.pyarrow_dtype, "unit", None) bad_units = ["ns"] - if pa_version_under2p0: - bad_units.extend(["s", "ms", "us"]) if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: request.node.add_marker( pytest.mark.xfail( @@ -352,8 +342,6 @@ def test_reindex(self, request, data, na_value): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) unit = getattr(data.dtype.pyarrow_dtype, "unit", None) bad_units = ["ns"] - if pa_version_under2p0: - bad_units.extend(["s", "ms", "us"]) if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: request.node.add_marker( pytest.mark.xfail( @@ -369,8 +357,6 @@ def test_loc_iloc_frame_single_dtype(self, request, using_array_manager, data): tz = getattr(data.dtype.pyarrow_dtype, "tz", None) unit = getattr(data.dtype.pyarrow_dtype, "unit", None) bad_units = ["ns"] - if pa_version_under2p0: - bad_units.extend(["s", "ms", "us"]) if ( pa_version_under3p0 and not using_array_manager @@ -419,8 +405,6 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): and pa_version_under6p0 ): request.node.add_marker(xfail_mark) - elif all_numeric_reductions in {"sum", "mean"} and pa_version_under2p0: - request.node.add_marker(xfail_mark) elif ( all_numeric_reductions in {"sum", "mean"} and skipna is False @@ -479,13 +463,6 @@ def test_reduce_series( class TestBaseGroupby(base.BaseGroupbyTests): def test_groupby_agg_extension(self, data_for_grouping, request): - tz = getattr(data_for_grouping.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}." - ) - ) super().test_groupby_agg_extension(data_for_grouping) def test_groupby_extension_no_sort(self, data_for_grouping, request): @@ -640,20 +617,6 @@ class TestBaseIndex(base.BaseIndexTests): class TestBaseInterface(base.BaseInterfaceTests): - def test_contains(self, data, data_missing, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - unit = getattr(data.dtype.pyarrow_dtype, "unit", None) - if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Not supported by pyarrow < 2.0 " - f"with timestamp type {tz} and {unit}" - ) - ) - ) - super().test_contains(data, data_missing) - @pytest.mark.xfail(reason="pyarrow.ChunkedArray does not support views.") def test_view(self, data): super().test_view(data) @@ -670,333 +633,12 @@ class TestBasePrinting(base.BasePrintingTests): class TestBaseReshaping(base.BaseReshapingTests): - def test_concat_columns(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_concat_columns(data, na_value) - - def test_concat_extension_arrays_copy_false(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_concat_extension_arrays_copy_false(data, na_value) - - def test_align(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_align(data, na_value) - - def test_align_frame(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_align_frame(data, na_value) - - def test_align_series_frame(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_align_series_frame(data, na_value) - - def test_merge(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_merge(data, na_value) - - def test_ravel(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_ravel(data) - @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") def test_transpose(self, data): super().test_transpose(data) - def test_transpose_frame(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_transpose_frame(data) - class TestBaseSetitem(base.BaseSetitemTests): - def test_setitem_scalar_series(self, data, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_scalar_series(data, box_in_series) - - def test_setitem_sequence(self, data, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_sequence(data, box_in_series) - - def test_setitem_sequence_broadcasts(self, data, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_sequence_broadcasts(data, box_in_series) - - @pytest.mark.parametrize("setter", ["loc", "iloc"]) - def test_setitem_scalar(self, data, setter, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_scalar(data, setter) - - def test_setitem_loc_scalar_mixed(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_loc_scalar_mixed(data) - - def test_setitem_loc_scalar_single(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_loc_scalar_single(data) - - def test_setitem_loc_scalar_multiple_homogoneous(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_loc_scalar_multiple_homogoneous(data) - - def test_setitem_iloc_scalar_mixed(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_iloc_scalar_mixed(data) - - def test_setitem_iloc_scalar_single(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_iloc_scalar_single(data) - - def test_setitem_iloc_scalar_multiple_homogoneous(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_iloc_scalar_multiple_homogoneous(data) - - @pytest.mark.parametrize( - "mask", - [ - np.array([True, True, True, False, False]), - pd.array([True, True, True, False, False], dtype="boolean"), - pd.array([True, True, True, pd.NA, pd.NA], dtype="boolean"), - ], - ids=["numpy-array", "boolean-array", "boolean-array-na"], - ) - def test_setitem_mask(self, data, mask, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_mask(data, mask, box_in_series) - - def test_setitem_mask_boolean_array_with_na(self, data, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - unit = getattr(data.dtype.pyarrow_dtype, "unit", None) - if pa_version_under2p0 and tz not in (None, "UTC") and unit == "us": - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_mask_boolean_array_with_na(data, box_in_series) - - @pytest.mark.parametrize( - "idx", - [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])], - ids=["list", "integer-array", "numpy-array"], - ) - def test_setitem_integer_array(self, data, idx, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_integer_array(data, idx, box_in_series) - - @pytest.mark.parametrize("as_callable", [True, False]) - @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_aligned(self, data, as_callable, setter, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_mask_aligned(data, as_callable, setter) - - @pytest.mark.parametrize("setter", ["loc", None]) - def test_setitem_mask_broadcast(self, data, setter, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_mask_broadcast(data, setter) - - def test_setitem_tuple_index(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_tuple_index(data) - - def test_setitem_slice(self, data, box_in_series, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_slice(data, box_in_series) - - def test_setitem_loc_iloc_slice(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_loc_iloc_slice(data) - - def test_setitem_slice_array(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_slice_array(data) - - def test_setitem_with_expansion_dataframe_column(self, data, full_indexer, request): - # Is there a better way to get the full_indexer id "null_slice"? - is_null_slice = "null_slice" in request.node.nodeid - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC") and not is_null_slice: - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_with_expansion_dataframe_column(data, full_indexer) - - def test_setitem_with_expansion_row(self, data, na_value, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=(f"Not supported by pyarrow < 2.0 with timestamp type {tz}") - ) - ) - super().test_setitem_with_expansion_row(data, na_value) - - def test_setitem_frame_2d_values(self, data, request): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_setitem_frame_2d_values(data) - @pytest.mark.xfail(reason="GH 45419: pyarrow.ChunkedArray does not support views") def test_setitem_preserves_views(self, data): super().test_setitem_preserves_views(data) @@ -1021,21 +663,7 @@ def test_EA_types(self, engine, data, request): class TestBaseUnaryOps(base.BaseUnaryOpsTests): - @pytest.mark.xfail( - pa_version_under2p0, - raises=NotImplementedError, - reason="pyarrow.compute.invert not supported in pyarrow<2.0", - ) - def test_invert(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - if not pa.types.is_boolean(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=f"pyarrow.compute.invert does support {pa_dtype}", - ) - ) - super().test_invert(data) + pass class TestBaseMethods(base.BaseMethodsTests): @@ -1160,17 +788,6 @@ def test_nargsort(self, data_missing_for_sorting, na_position, expected): @pytest.mark.parametrize("ascending", [True, False]) def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): - pa_dtype = data_for_sorting.dtype.pyarrow_dtype - if pa.types.is_duration(pa_dtype) and not ascending and not pa_version_under2p0: - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=( - f"unique has no pyarrow kernel " - f"for {pa_dtype} when ascending={ascending}" - ), - ) - ) with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0, check_stacklevel=False ): @@ -1205,22 +822,6 @@ def test_sort_values_frame(self, data_for_sorting, ascending, request): ): super().test_sort_values_frame(data_for_sorting, ascending) - @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) - @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) - def test_unique(self, data, box, method, request): - pa_dtype = data.dtype.pyarrow_dtype - if pa.types.is_duration(pa_dtype) and not pa_version_under2p0: - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=f"unique has no pyarrow kernel for {pa_dtype}.", - ) - ) - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under2p0, check_stacklevel=False - ): - super().test_unique(data, box, method) - @pytest.mark.parametrize("na_sentinel", [-1, -2]) def test_factorize(self, data_for_grouping, na_sentinel, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype @@ -1249,10 +850,7 @@ def test_factorize_equivalence(self, data_for_grouping, na_sentinel, request): reason=f"dictionary_encode has no pyarrow kernel for {pa_dtype}", ) ) - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under2p0, check_stacklevel=False - ): - super().test_factorize_equivalence(data_for_grouping, na_sentinel) + super().test_factorize_equivalence(data_for_grouping, na_sentinel) def test_factorize_empty(self, data, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1265,54 +863,6 @@ def test_factorize_empty(self, data, request): ) super().test_factorize_empty(data) - def test_shift_fill_value(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - tz = getattr(pa_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_shift_fill_value(data) - - @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) - def test_repeat(self, data, repeats, as_series, use_numpy, request): - pa_dtype = data.dtype.pyarrow_dtype - tz = getattr(pa_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC") and repeats != 0: - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Not supported by pyarrow < 2.0 with " - f"timestamp type {tz} when repeats={repeats}" - ) - ) - ) - super().test_repeat(data, repeats, as_series, use_numpy) - - def test_insert(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - tz = getattr(pa_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_insert(data) - - def test_combine_first(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - tz = getattr(pa_dtype, "tz", None) - if pa_version_under2p0 and tz not in (None, "UTC"): - request.node.add_marker( - pytest.mark.xfail( - reason=f"Not supported by pyarrow < 2.0 with timestamp type {tz}" - ) - ) - super().test_combine_first(data) - @pytest.mark.xfail( reason="result dtype pyarrow[bool] better than expected dtype object" ) @@ -1394,14 +944,10 @@ def test_arith_series_with_scalar( or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if ( - all_arithmetic_operators - in { - "__mod__", - "__rmod__", - } - or pa_version_under2p0 - ): + if all_arithmetic_operators in { + "__mod__", + "__rmod__", + }: self.series_scalar_exc = NotImplementedError elif arrow_temporal_supported: self.series_scalar_exc = None @@ -1416,7 +962,7 @@ def test_arith_series_with_scalar( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1439,7 +985,7 @@ def test_arith_series_with_scalar( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1464,14 +1010,10 @@ def test_arith_frame_with_scalar( or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if ( - all_arithmetic_operators - in { - "__mod__", - "__rmod__", - } - or pa_version_under2p0 - ): + if all_arithmetic_operators in { + "__mod__", + "__rmod__", + }: self.frame_scalar_exc = NotImplementedError elif arrow_temporal_supported: self.frame_scalar_exc = None @@ -1482,7 +1024,7 @@ def test_arith_frame_with_scalar( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1505,7 +1047,7 @@ def test_arith_frame_with_scalar( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1530,14 +1072,10 @@ def test_arith_series_with_array( or all_arithmetic_operators in ("__sub__", "__rsub__") and pa.types.is_temporal(pa_dtype) ) - if ( - all_arithmetic_operators - in { - "__mod__", - "__rmod__", - } - or pa_version_under2p0 - ): + if all_arithmetic_operators in { + "__mod__", + "__rmod__", + }: self.series_array_exc = NotImplementedError elif arrow_temporal_supported: self.series_array_exc = None @@ -1548,7 +1086,7 @@ def test_arith_series_with_array( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1565,7 +1103,7 @@ def test_arith_series_with_array( "__rsub__", ) and pa.types.is_unsigned_integer(pa_dtype) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1589,7 +1127,7 @@ def test_arith_series_with_array( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under2p0 + and not pa_version_under5p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1610,13 +1148,10 @@ def test_arith_series_with_array( def test_add_series_with_extension_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if ( - not ( - pa.types.is_integer(pa_dtype) - or pa.types.is_floating(pa_dtype) - or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) - ) - or pa_version_under2p0 + if not ( + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or (not pa_version_under8p0 and pa.types.is_duration(pa_dtype)) ): request.node.add_marker( pytest.mark.xfail( diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 1bb89f50e9de0..c5aebb282bafa 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -19,7 +19,6 @@ import pytest from pandas.compat import ( - pa_version_under2p0, pa_version_under6p0, pa_version_under7p0, ) @@ -327,26 +326,6 @@ def test_sort_values_frame(self, data_for_sorting, ascending): ): super().test_sort_values_frame(data_for_sorting, ascending) - @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) - @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) - def test_unique(self, data, box, method): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 and getattr(data.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_unique(data, box, method) - - @pytest.mark.parametrize("na_sentinel", [-1, -2]) - def test_factorize_equivalence(self, data_for_grouping, na_sentinel): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_factorize_equivalence(data_for_grouping, na_sentinel) - class TestCasting(base.BaseCastingTests): pass diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index f4d958999b981..8635aed3559d4 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -10,7 +10,6 @@ from pandas.compat import ( IS64, - pa_version_under2p0, pa_version_under7p0, ) from pandas.errors import PerformanceWarning @@ -230,12 +229,7 @@ def test_unique(self, index_flat): except NotImplementedError: pass - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_flat.dtype, "storage", "") == "pyarrow", - ): - result = idx.unique() + result = idx.unique() tm.assert_index_equal(result, idx_unique) # nans: @@ -255,13 +249,7 @@ def test_unique(self, index_flat): expected = idx_unique_nan for pos, i in enumerate([idx_nan, idx_unique_nan]): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 - and getattr(index_flat.dtype, "storage", "") == "pyarrow" - and pos == 0, - ): - result = i.unique() + result = i.unique() tm.assert_index_equal(result, expected) def test_searchsorted_monotonic(self, index_flat, request): diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 941f92111f155..4cb8e95f32e6b 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -8,10 +8,7 @@ import numpy as np import pytest -from pandas.compat import ( - pa_version_under2p0, - pa_version_under7p0, -) +from pandas.compat import pa_version_under7p0 from pandas.errors import PerformanceWarning from pandas.core.dtypes.cast import find_common_type @@ -579,12 +576,8 @@ def test_intersection_duplicates_all_indexes(index): idx = index idx_non_unique = idx[[0, 0, 1, 2]] - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under2p0 and getattr(index.dtype, "storage", "") == "pyarrow", - ): - assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx)) - assert idx.intersection(idx_non_unique).is_unique + assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx)) + assert idx.intersection(idx_non_unique).is_unique @pytest.mark.parametrize( diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9f47c220a111b..0fa6734ee3ca5 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -15,7 +15,6 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under2p0, pa_version_under5p0, pa_version_under6p0, pa_version_under8p0, @@ -836,13 +835,8 @@ def test_s3_roundtrip_for_dir( expected_df = df_compat.copy() # GH #35791 - # read_table uses the new Arrow Datasets API since pyarrow 1.0.0 - # Previous behaviour was pyarrow partitioned columns become 'category' dtypes - # These are added to back of dataframe on read. In new API category dtype is - # only used if partition field is string, but this changed again to use - # category dtype for all types (not only strings) in pyarrow 2.0.0 if partition_col: - partition_col_type = "int32" if pa_version_under2p0 else "category" + partition_col_type = "category" expected_df[partition_col] = expected_df[partition_col].astype( partition_col_type @@ -975,7 +969,7 @@ def test_timestamp_nanoseconds(self, pa): def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( - not pa_version_under2p0 + not pa_version_under5p0 and timezone_aware_date_list.tzinfo != datetime.timezone.utc ): request.node.add_marker( diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 4b25752940418..1abe5988479a1 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -6,10 +6,7 @@ import numpy as np import pytest -from pandas.compat import ( - pa_version_under2p0, - pa_version_under4p0, -) +from pandas.compat import pa_version_under4p0 from pandas.errors import PerformanceWarning from pandas import ( @@ -266,11 +263,7 @@ def test_empty_str_methods(any_string_dtype): tm.assert_series_equal(empty_bool, empty.str.isalnum()) tm.assert_series_equal(empty_bool, empty.str.isalpha()) tm.assert_series_equal(empty_bool, empty.str.isdigit()) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under2p0, - ): - tm.assert_series_equal(empty_bool, empty.str.isspace()) + tm.assert_series_equal(empty_bool, empty.str.isspace()) tm.assert_series_equal(empty_bool, empty.str.islower()) tm.assert_series_equal(empty_bool, empty.str.isupper()) tm.assert_series_equal(empty_bool, empty.str.istitle()) @@ -321,13 +314,7 @@ def test_ismethods(method, expected, any_string_dtype): ) expected_dtype = "bool" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" - and pa_version_under2p0 - and method == "isspace", - ): - result = getattr(ser.str, method)() + result = getattr(ser.str, method)() tm.assert_series_equal(result, expected) # compare with standard library From 87a9567b6bae650af8afded24e55256c2a512147 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 11:52:21 -0700 Subject: [PATCH 4/7] Remove 3.0.0 check --- pandas/compat/__init__.py | 2 - pandas/compat/pyarrow.py | 2 - pandas/core/arrays/arrow/array.py | 14 +----- pandas/core/arrays/string_arrow.py | 12 +---- pandas/tests/extension/test_arrow.py | 67 +--------------------------- 5 files changed, 5 insertions(+), 92 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 1868f6028c9f9..b3397ec6b8be1 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, @@ -152,7 +151,6 @@ def get_lzma_file() -> type[lzma.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under3p0", "pa_version_under4p0", "pa_version_under5p0", "pa_version_under6p0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 6277b7e254fb4..886031b9b1451 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,7 +9,6 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") pa_version_under6p0 = _palv < Version("6.0.0") @@ -17,7 +16,6 @@ pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") except ImportError: - pa_version_under3p0 = True pa_version_under4p0 = True pa_version_under5p0 = True pa_version_under6p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 8b77b6f19836b..dcb69d4a85435 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, @@ -517,20 +516,11 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: return type(self)(pc.drop_null(self._data)) def isin(self, values) -> npt.NDArray[np.bool_]: - # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True - # for null values, so we short-circuit to return all False array. + # short-circuit to return all False array. if not len(values): return np.zeros(len(self), dtype=bool) - kwargs = {} - if pa_version_under3p0: - # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises - # with unexpected keyword argument in pyarrow 3.0.0+ - kwargs["skip_null"] = True - - result = pc.is_in( - self._data, value_set=pa.array(values, from_pandas=True), **kwargs - ) + result = pc.is_in(self._data, value_set=pa.array(values, from_pandas=True)) # pyarrow 2.0.0 returned nulls, so we explicitly specify dtype to convert nulls # to False return np.array(result, dtype=np.bool_) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7c30dba1e5e6c..1d30a8f5d3e52 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, ) @@ -207,18 +206,11 @@ def isin(self, values) -> npt.NDArray[np.bool_]: if pa_scalar.type in (pa.string(), pa.null()) ] - # for an empty value_set pyarrow 3.0.0 segfaults and pyarrow 2.0.0 returns True - # for null values, so we short-circuit to return all False array. + # short-circuit to return all False array. if not len(value_set): return np.zeros(len(self), dtype=bool) - kwargs = {} - if pa_version_under3p0: - # in pyarrow 2.0.0 skip_null is ignored but is a required keyword and raises - # with unexpected keyword argument in pyarrow 3.0.0+ - kwargs["skip_null"] = True - - result = pc.is_in(self._data, value_set=pa.array(value_set), **kwargs) + result = pc.is_in(self._data, value_set=pa.array(value_set)) # pyarrow 2.0.0 returned nulls, so we explicily specify dtype to convert nulls # to False return np.array(result, dtype=np.bool_) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index db71b1d504c5e..3f19d88e46a35 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,7 +23,6 @@ from pandas.compat import ( is_ci_environment, is_platform_windows, - pa_version_under3p0, pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, @@ -224,12 +223,6 @@ def test_from_dtype(self, data, request): def test_from_sequence_pa_array(self, data, request): # https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784 # data._data = pa.ChunkedArray - if pa_version_under3p0: - request.node.add_marker( - pytest.mark.xfail( - reason="ChunkedArray has no attribute combine_chunks", - ) - ) result = type(data)._from_sequence(data._data) tm.assert_extension_array_equal(result, data) assert isinstance(result._data, pa.ChunkedArray) @@ -253,13 +246,7 @@ def test_from_sequence_pa_array_notimplemented(self, request): def test_from_sequence_of_strings_pa_array(self, data, request): pa_dtype = data.dtype.pyarrow_dtype - if pa_version_under3p0: - request.node.add_marker( - pytest.mark.xfail( - reason="ChunkedArray has no attribute combine_chunks", - ) - ) - elif pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]"): + if pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]"): request.node.add_marker( pytest.mark.xfail( reason="Nanosecond time parsing not supported.", @@ -323,56 +310,6 @@ class TestGetitemTests(base.BaseGetitemTests): def test_getitem_scalar(self, data): super().test_getitem_scalar(data) - def test_take_series(self, request, data): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - unit = getattr(data.dtype.pyarrow_dtype, "unit", None) - bad_units = ["ns"] - if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Not supported by pyarrow < 3.0 " - f"with timestamp type {tz} and {unit}" - ) - ) - ) - super().test_take_series(data) - - def test_reindex(self, request, data, na_value): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - unit = getattr(data.dtype.pyarrow_dtype, "unit", None) - bad_units = ["ns"] - if pa_version_under3p0 and tz not in (None, "UTC") and unit in bad_units: - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Not supported by pyarrow < 3.0 " - f"with timestamp type {tz} and {unit}" - ) - ) - ) - super().test_reindex(data, na_value) - - def test_loc_iloc_frame_single_dtype(self, request, using_array_manager, data): - tz = getattr(data.dtype.pyarrow_dtype, "tz", None) - unit = getattr(data.dtype.pyarrow_dtype, "unit", None) - bad_units = ["ns"] - if ( - pa_version_under3p0 - and not using_array_manager - and tz not in (None, "UTC") - and unit in bad_units - ): - request.node.add_marker( - pytest.mark.xfail( - reason=( - f"Not supported by pyarrow < 3.0 " - f"with timestamp type {tz} and {unit}" - ) - ) - ) - super().test_loc_iloc_frame_single_dtype(data) - class TestBaseNumericReduce(base.BaseNumericReduceTests): def check_reduce(self, ser, op_name, skipna): @@ -453,8 +390,6 @@ def test_reduce_series( ) if not pa.types.is_boolean(pa_dtype): request.node.add_marker(xfail_mark) - elif pa_version_under3p0: - request.node.add_marker(xfail_mark) op_name = all_boolean_reductions s = pd.Series(data) result = getattr(s, op_name)(skipna=skipna) From 531e8edb84c495a70f52533a01b09167afa127ab Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 12:01:47 -0700 Subject: [PATCH 5/7] Remove 4.0.0 check --- pandas/compat/__init__.py | 2 - pandas/compat/pyarrow.py | 2 - pandas/core/arrays/arrow/array.py | 28 +-- pandas/core/arrays/string_arrow.py | 51 +----- pandas/tests/extension/test_arrow.py | 6 - pandas/tests/strings/test_find_replace.py | 203 ++++------------------ pandas/tests/strings/test_strings.py | 81 ++------- 7 files changed, 57 insertions(+), 316 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index b3397ec6b8be1..1ddc404544467 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, @@ -151,7 +150,6 @@ def get_lzma_file() -> type[lzma.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under4p0", "pa_version_under5p0", "pa_version_under6p0", "pa_version_under7p0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 886031b9b1451..4c8fbe9ac02f0 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,14 +9,12 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") pa_version_under6p0 = _palv < Version("6.0.0") pa_version_under7p0 = _palv < Version("7.0.0") pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") except ImportError: - pa_version_under4p0 = True pa_version_under5p0 = True pa_version_under6p0 = True pa_version_under7p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index dcb69d4a85435..9ed0088487264 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, @@ -107,10 +106,8 @@ def floordiv_compat( "rmod": NotImplemented, "divmod": NotImplemented, "rdivmod": NotImplemented, - "pow": NotImplemented if pa_version_under4p0 else pc.power_checked, - "rpow": NotImplemented - if pa_version_under4p0 - else lambda x, y: pc.power_checked(y, x), + "pow": pc.power_checked, + "rpow": lambda x, y: pc.power_checked(y, x), } if TYPE_CHECKING: @@ -549,11 +546,8 @@ def factorize( use_na_sentinel: bool | lib.NoDefault = lib.no_default, ) -> tuple[np.ndarray, ExtensionArray]: resolved_na_sentinel = resolve_na_sentinel(na_sentinel, use_na_sentinel) - if pa_version_under4p0: - encoded = self._data.dictionary_encode() - else: - null_encoding = "mask" if resolved_na_sentinel is not None else "encode" - encoded = self._data.dictionary_encode(null_encoding=null_encoding) + null_encoding = "mask" if resolved_na_sentinel is not None else "encode" + encoded = self._data.dictionary_encode(null_encoding=null_encoding) indices = pa.chunked_array( [c.indices for c in encoded.chunks], type=encoded.type.index_type ).to_pandas() @@ -565,16 +559,6 @@ def factorize( if encoded.num_chunks: uniques = type(self)(encoded.chunk(0).dictionary) - if resolved_na_sentinel is None and pa_version_under4p0: - # TODO: share logic with BaseMaskedArray.factorize - # Insert na with the proper code - na_mask = indices.values == -1 - na_index = na_mask.argmax() - if na_mask[na_index]: - na_code = 0 if na_index == 0 else indices[:na_index].max() + 1 - uniques = uniques.insert(na_code, self.dtype.na_value) - indices[indices >= na_code] += 1 - indices[indices == -1] = na_code else: uniques = type(self)(pa.array([], type=encoded.type.value_type)) @@ -905,10 +889,6 @@ def _quantile( ------- same type as self """ - if pa_version_under4p0: - raise NotImplementedError( - "quantile only supported for pyarrow version >= 4.0" - ) result = pc.quantile(self._data, q=qs, interpolation=interpolation) return type(self)(result) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 1d30a8f5d3e52..7bb56e7b38058 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -16,10 +16,7 @@ Scalar, npt, ) -from pandas.compat import ( - pa_version_under4p0, - pa_version_under5p0, -) +from pandas.compat import pa_version_under5p0 from pandas.core.dtypes.common import ( is_bool_dtype, @@ -304,8 +301,8 @@ def _str_contains( return super()._str_contains(pat, case, flags, na, regex) if regex: - if pa_version_under4p0 or case is False: - fallback_performancewarning(version="4") + if case is False: + fallback_performancewarning() return super()._str_contains(pat, case, flags, na, regex) else: result = pc.match_substring_regex(self._data, pat) @@ -320,18 +317,10 @@ def _str_contains( return result def _str_startswith(self, pat: str, na=None): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_startswith(pat, na) - pat = "^" + re.escape(pat) return self._str_contains(pat, na=na, regex=True) def _str_endswith(self, pat: str, na=None): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_endswith(pat, na) - pat = re.escape(pat) + "$" return self._str_contains(pat, na=na, regex=True) @@ -344,14 +333,8 @@ def _str_replace( flags: int = 0, regex: bool = True, ): - if ( - pa_version_under4p0 - or isinstance(pat, re.Pattern) - or callable(repl) - or not case - or flags - ): - fallback_performancewarning(version="4") + if isinstance(pat, re.Pattern) or callable(repl) or not case or flags: + fallback_performancewarning() return super()._str_replace(pat, repl, n, case, flags, regex) func = pc.replace_substring_regex if regex else pc.replace_substring @@ -361,10 +344,6 @@ def _str_replace( def _str_match( self, pat: str, case: bool = True, flags: int = 0, na: Scalar | None = None ): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_match(pat, case, flags, na) - if not pat.startswith("^"): pat = "^" + pat return self._str_contains(pat, case, flags, na, regex=True) @@ -372,10 +351,6 @@ def _str_match( def _str_fullmatch( self, pat, case: bool = True, flags: int = 0, na: Scalar | None = None ): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_fullmatch(pat, case, flags, na) - if not pat.endswith("$") or pat.endswith("//$"): pat = pat + "$" return self._str_match(pat, case, flags, na) @@ -417,10 +392,6 @@ def _str_isupper(self): return BooleanDtype().__from_arrow__(result) def _str_len(self): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_len() - result = pc.utf8_length(self._data) return Int64Dtype().__from_arrow__(result) @@ -431,10 +402,6 @@ def _str_upper(self): return type(self)(pc.utf8_upper(self._data)) def _str_strip(self, to_strip=None): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_strip(to_strip) - if to_strip is None: result = pc.utf8_trim_whitespace(self._data) else: @@ -442,10 +409,6 @@ def _str_strip(self, to_strip=None): return type(self)(result) def _str_lstrip(self, to_strip=None): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_lstrip(to_strip) - if to_strip is None: result = pc.utf8_ltrim_whitespace(self._data) else: @@ -453,10 +416,6 @@ def _str_lstrip(self, to_strip=None): return type(self)(result) def _str_rstrip(self, to_strip=None): - if pa_version_under4p0: - fallback_performancewarning(version="4") - return super()._str_rstrip(to_strip) - if to_strip is None: result = pc.utf8_rtrim_whitespace(self._data) else: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 3f19d88e46a35..c7c0c99ff5345 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,7 +23,6 @@ from pandas.compat import ( is_ci_environment, is_platform_windows, - pa_version_under4p0, pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, @@ -1164,11 +1163,6 @@ def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]") -@pytest.mark.xfail( - pa_version_under4p0, - raises=NotImplementedError, - reason="quantile only supported for pyarrow version >= 4.0", -) @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] ) diff --git a/pandas/tests/strings/test_find_replace.py b/pandas/tests/strings/test_find_replace.py index 62f9478bf25ff..59b41e0ec944a 100644 --- a/pandas/tests/strings/test_find_replace.py +++ b/pandas/tests/strings/test_find_replace.py @@ -4,7 +4,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under4p0 from pandas.errors import PerformanceWarning import pandas as pd @@ -25,11 +24,7 @@ def test_contains(any_string_dtype): values = Series(values, dtype=any_string_dtype) pat = "mmm[_]+" - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.contains(pat) + result = values.str.contains(pat) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series( np.array([False, np.nan, True, True, False], dtype=np.object_), @@ -48,11 +43,7 @@ def test_contains(any_string_dtype): np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=object), dtype=any_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.contains(pat) + result = values.str.contains(pat) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -81,22 +72,14 @@ def test_contains(any_string_dtype): ) pat = "mmm[_]+" - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.contains(pat) + result = values.str.contains(pat) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series( np.array([False, np.nan, True, True], dtype=np.object_), dtype=expected_dtype ) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.contains(pat, na=False) + result = values.str.contains(pat, na=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -105,11 +88,7 @@ def test_contains(any_string_dtype): np.array(["foo", "xyz", "fooommm__foo", "mmm_"], dtype=np.object_), dtype=any_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.contains(pat) + result = values.str.contains(pat) expected = Series(np.array([False, False, True, True]), dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -173,11 +152,7 @@ def test_contains_na_kwarg_for_nullable_string_dtype( # https://github.com/pandas-dev/pandas/pull/41025#issuecomment-824062416 values = Series(["a", "b", "c", "a", np.nan], dtype=nullable_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0 and regex, - ): - result = values.str.contains("a", na=na, regex=regex) + result = values.str.contains("a", na=na, regex=regex) expected = Series([True, False, False, True, expected], dtype="boolean") tm.assert_series_equal(result, expected) @@ -189,11 +164,7 @@ def test_contains_moar(any_string_dtype): dtype=any_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("a") + result = s.str.contains("a") expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series( [False, False, False, True, True, False, np.nan, False, False, True], @@ -211,22 +182,14 @@ def test_contains_moar(any_string_dtype): ) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("Aa") + result = s.str.contains("Aa") expected = Series( [False, False, False, True, False, False, np.nan, False, False, False], dtype=expected_dtype, ) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("ba") + result = s.str.contains("ba") expected = Series( [False, False, False, True, False, False, np.nan, False, False, False], dtype=expected_dtype, @@ -248,39 +211,23 @@ def test_contains_nan(any_string_dtype): # PR #14171 s = Series([np.nan, np.nan, np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("foo", na=False) + result = s.str.contains("foo", na=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series([False, False, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("foo", na=True) + result = s.str.contains("foo", na=True) expected = Series([True, True, True], dtype=expected_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("foo", na="foo") + result = s.str.contains("foo", na="foo") if any_string_dtype == "object": expected = Series(["foo", "foo", "foo"], dtype=np.object_) else: expected = Series([True, True, True], dtype="boolean") tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.contains("foo") + result = s.str.contains("foo") expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series([np.nan, np.nan, np.nan], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -326,21 +273,13 @@ def test_startswith_nullable_string_dtype(nullable_string_dtype, na): ["om", None, "foo_nom", "nom", "bar_foo", None, "foo", "regex", "rege."], dtype=nullable_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.startswith("foo", na=na) + result = values.str.startswith("foo", na=na) exp = Series( [False, na, True, False, False, na, True, False, False], dtype="boolean" ) tm.assert_series_equal(result, exp) - with tm.maybe_produces_warning( - PerformanceWarning, - nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.startswith("rege.", na=na) + result = values.str.startswith("rege.", na=na) exp = Series( [False, na, False, False, False, na, False, False, True], dtype="boolean" ) @@ -387,21 +326,13 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): ["om", None, "foo_nom", "nom", "bar_foo", None, "foo", "regex", "rege."], dtype=nullable_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.endswith("foo", na=na) + result = values.str.endswith("foo", na=na) exp = Series( [False, na, False, False, True, na, True, False, False], dtype="boolean" ) tm.assert_series_equal(result, exp) - with tm.maybe_produces_warning( - PerformanceWarning, - nullable_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.endswith("rege.", na=na) + result = values.str.endswith("rege.", na=na) exp = Series( [False, na, False, False, False, na, False, False, True], dtype="boolean" ) @@ -416,11 +347,7 @@ def test_endswith_nullable_string_dtype(nullable_string_dtype, na): def test_replace(any_string_dtype): ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("BAD[_]*", "", regex=True) + result = ser.str.replace("BAD[_]*", "", regex=True) expected = Series(["foobar", np.nan], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -429,19 +356,11 @@ def test_replace_max_replacements(any_string_dtype): ser = Series(["fooBAD__barBAD", np.nan], dtype=any_string_dtype) expected = Series(["foobarBAD", np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("BAD[_]*", "", n=1, regex=True) + result = ser.str.replace("BAD[_]*", "", n=1, regex=True) tm.assert_series_equal(result, expected) expected = Series(["foo__barBAD", np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("BAD", "", n=1, regex=False) + result = ser.str.replace("BAD", "", n=1, regex=False) tm.assert_series_equal(result, expected) @@ -600,11 +519,7 @@ def test_replace_literal(regex, expected, any_string_dtype): # GH16808 literal replace (regex=False vs regex=True) ser = Series(["f.o", "foo", np.nan], dtype=any_string_dtype) expected = Series(expected, dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("f.", "ba", regex=regex) + result = ser.str.replace("f.", "ba", regex=regex) tm.assert_series_equal(result, expected) @@ -633,11 +548,7 @@ def test_replace_moar(any_string_dtype): dtype=any_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("A", "YYY") + result = ser.str.replace("A", "YYY") expected = Series( ["YYY", "B", "C", "YYYaba", "Baca", "", np.nan, "CYYYBYYY", "dog", "cat"], dtype=any_string_dtype, @@ -739,17 +650,13 @@ def test_replace_regex_single_character(regex, any_string_dtype): "version. In addition, single character regular expressions will *not* " "be treated as literal strings when regex=True." ) - pyarrow_warn = any_string_dtype == "string[pyarrow]" and pa_version_under4p0 with tm.assert_produces_warning( - FutureWarning, match=msg, raise_on_extra_warnings=not pyarrow_warn + FutureWarning, + match=msg, ): result = s.str.replace(".", "a", regex=regex) else: - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.replace(".", "a", regex=regex) + result = s.str.replace(".", "a", regex=regex) expected = Series(["aab", "a", "b", np.nan, ""], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -765,49 +672,29 @@ def test_match(any_string_dtype): expected_dtype = "object" if any_string_dtype == "object" else "boolean" values = Series(["fooBAD__barBAD", np.nan, "foo"], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.match(".*(BAD[_]+).*(BAD)") + result = values.str.match(".*(BAD[_]+).*(BAD)") expected = Series([True, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) values = Series( ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.match(".*BAD[_]+.*BAD") + result = values.str.match(".*BAD[_]+.*BAD") expected = Series([True, True, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.match("BAD[_]+.*BAD") + result = values.str.match("BAD[_]+.*BAD") expected = Series([False, True, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) values = Series( ["fooBAD__barBAD", "^BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.match("^BAD[_]+.*BAD") + result = values.str.match("^BAD[_]+.*BAD") expected = Series([False, False, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = values.str.match("\\^BAD[_]+.*BAD") + result = values.str.match("\\^BAD[_]+.*BAD") expected = Series([False, True, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -838,20 +725,12 @@ def test_match_na_kwarg(any_string_dtype): # GH #6609 s = Series(["a", "b", np.nan], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.match("a", na=False) + result = s.str.match("a", na=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series([True, False, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = s.str.match("a") + result = s.str.match("a") expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series([True, False, np.nan], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -878,11 +757,7 @@ def test_fullmatch(any_string_dtype): ser = Series( ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.fullmatch(".*BAD[_]+.*BAD") + result = ser.str.fullmatch(".*BAD[_]+.*BAD") expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series([True, False, np.nan, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -892,11 +767,7 @@ def test_fullmatch_na_kwarg(any_string_dtype): ser = Series( ["fooBAD__barBAD", "BAD_BADleroybrown", np.nan, "foo"], dtype=any_string_dtype ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False) + result = ser.str.fullmatch(".*BAD[_]+.*BAD", na=False) expected_dtype = np.bool_ if any_string_dtype == "object" else "boolean" expected = Series([True, False, False, False], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -908,11 +779,7 @@ def test_fullmatch_case_kwarg(any_string_dtype): expected = Series([True, False, False, False], dtype=expected_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.fullmatch("ab", case=True) + result = ser.str.fullmatch("ab", case=True) tm.assert_series_equal(result, expected) expected = Series([True, True, False, False], dtype=expected_dtype) diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index 1abe5988479a1..d62858f139b0b 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -6,9 +6,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under4p0 -from pandas.errors import PerformanceWarning - from pandas import ( DataFrame, Index, @@ -179,34 +176,14 @@ def test_empty_str_methods(any_string_dtype): assert "" == empty.str.cat() tm.assert_series_equal(empty_str, empty.str.title()) tm.assert_series_equal(empty_int, empty.str.count("a")) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_bool, empty.str.contains("a")) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_bool, empty.str.startswith("a")) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_bool, empty.str.endswith("a")) + tm.assert_series_equal(empty_bool, empty.str.contains("a")) + tm.assert_series_equal(empty_bool, empty.str.startswith("a")) + tm.assert_series_equal(empty_bool, empty.str.endswith("a")) tm.assert_series_equal(empty_str, empty.str.lower()) tm.assert_series_equal(empty_str, empty.str.upper()) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_str, empty.str.replace("a", "b")) + tm.assert_series_equal(empty_str, empty.str.replace("a", "b")) tm.assert_series_equal(empty_str, empty.str.repeat(3)) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_bool, empty.str.match("^a")) + tm.assert_series_equal(empty_bool, empty.str.match("^a")) tm.assert_frame_equal( DataFrame(columns=[0], dtype=any_string_dtype), empty.str.extract("()", expand=True), @@ -222,11 +199,7 @@ def test_empty_str_methods(any_string_dtype): ) tm.assert_frame_equal(empty_df, empty.str.get_dummies()) tm.assert_series_equal(empty_str, empty_str.str.join("")) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_int, empty.str.len()) + tm.assert_series_equal(empty_int, empty.str.len()) tm.assert_series_equal(empty_object, empty_str.str.findall("a")) tm.assert_series_equal(empty_int, empty.str.find("a")) tm.assert_series_equal(empty_int, empty.str.rfind("a")) @@ -240,21 +213,9 @@ def test_empty_str_methods(any_string_dtype): tm.assert_frame_equal(empty_df, empty.str.rpartition("a")) tm.assert_series_equal(empty_str, empty.str.slice(stop=1)) tm.assert_series_equal(empty_str, empty.str.slice(step=1)) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_str, empty.str.strip()) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_str, empty.str.lstrip()) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - tm.assert_series_equal(empty_str, empty.str.rstrip()) + tm.assert_series_equal(empty_str, empty.str.strip()) + tm.assert_series_equal(empty_str, empty.str.lstrip()) + tm.assert_series_equal(empty_str, empty.str.rstrip()) tm.assert_series_equal(empty_str, empty.str.wrap(42)) tm.assert_series_equal(empty_str, empty.str.get(0)) tm.assert_series_equal(empty_object, empty_bytes.str.decode("ascii")) @@ -384,11 +345,7 @@ def test_len(any_string_dtype): ["foo", "fooo", "fooooo", np.nan, "fooooooo", "foo\n", "あ"], dtype=any_string_dtype, ) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.len() + result = ser.str.len() expected_dtype = "float64" if any_string_dtype == "object" else "Int64" expected = Series([3, 4, 6, np.nan, 8, 4, 1], dtype=expected_dtype) tm.assert_series_equal(result, expected) @@ -475,11 +432,7 @@ def test_pipe_failures(any_string_dtype): expected = Series([["A", "B", "C"]], dtype=object) tm.assert_series_equal(result, expected) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = ser.str.replace("|", " ", regex=False) + result = ser.str.replace("|", " ", regex=False) expected = Series(["A B C"], dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -549,11 +502,7 @@ def test_slice_replace(start, stop, repl, expected, any_string_dtype): def test_strip_lstrip_rstrip(any_string_dtype, method, exp): ser = Series([" aa ", " bb \n", np.nan, "cc "], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = getattr(ser.str, method)() + result = getattr(ser.str, method)() expected = Series(exp, dtype=any_string_dtype) tm.assert_series_equal(result, expected) @@ -585,11 +534,7 @@ def test_strip_lstrip_rstrip_mixed_object(method, exp): def test_strip_lstrip_rstrip_args(any_string_dtype, method, exp): ser = Series(["xxABCxx", "xx BNSD", "LDFJH xx"], dtype=any_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - any_string_dtype == "string[pyarrow]" and pa_version_under4p0, - ): - result = getattr(ser.str, method)("x") + result = getattr(ser.str, method)("x") expected = Series(exp, dtype=any_string_dtype) tm.assert_series_equal(result, expected) From 01ff76592854babe77a5ae72c2ee1f57f15898cd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 14 Oct 2022 14:18:56 -0700 Subject: [PATCH 6/7] Fix bad undos --- .../tests/arrays/string_/test_string_arrow.py | 2 +- pandas/tests/extension/test_arrow.py | 35 ++++++++++++++++++- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 3ed70c086c7a8..7140466aaf6c3 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -122,7 +122,7 @@ def test_from_sequence_wrong_dtype_raises(): reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): - msg = re.escape("pyarrow>=1.0.0 is required for PyArrow backed") + msg = re.escape("pyarrow>=5.0.0 is required for PyArrow backed") with pytest.raises(ImportError, match=msg): StringDtype(storage="pyarrow") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index c7c0c99ff5345..0b64165f6deb8 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -597,7 +597,16 @@ def test_EA_types(self, engine, data, request): class TestBaseUnaryOps(base.BaseUnaryOpsTests): - pass + def test_invert(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if not pa.types.is_boolean(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"pyarrow.compute.invert does support {pa_dtype}", + ) + ) + super().test_invert(data) class TestBaseMethods(base.BaseMethodsTests): @@ -722,6 +731,17 @@ def test_nargsort(self, data_missing_for_sorting, na_position, expected): @pytest.mark.parametrize("ascending", [True, False]) def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): + pa_dtype = data_for_sorting.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype) and not ascending: + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=( + f"unique has no pyarrow kernel " + f"for {pa_dtype} when ascending={ascending}" + ), + ) + ) with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0, check_stacklevel=False ): @@ -756,6 +776,19 @@ def test_sort_values_frame(self, data_for_sorting, ascending, request): ): super().test_sort_values_frame(data_for_sorting, ascending) + @pytest.mark.parametrize("box", [pd.Series, lambda x: x]) + @pytest.mark.parametrize("method", [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_duration(pa_dtype): + request.node.add_marker( + pytest.mark.xfail( + raises=pa.ArrowNotImplementedError, + reason=f"unique has no pyarrow kernel for {pa_dtype}.", + ) + ) + super().test_unique(data, box, method) + @pytest.mark.parametrize("na_sentinel", [-1, -2]) def test_factorize(self, data_for_grouping, na_sentinel, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype From bf050a7051f2e28ef26865ef006a713dd312f8a1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Sat, 15 Oct 2022 16:43:44 -0700 Subject: [PATCH 7/7] Remove 5.0.0 checks --- .github/workflows/ubuntu.yml | 2 +- ci/deps/actions-38-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/_testing/__init__.py | 4 ++-- pandas/compat/__init__.py | 2 -- pandas/compat/_optional.py | 2 +- pandas/compat/pyarrow.py | 2 -- pandas/core/arrays/arrow/array.py | 9 ++++----- pandas/core/arrays/arrow/dtype.py | 8 ++++---- pandas/core/arrays/string_.py | 6 +++--- pandas/core/arrays/string_arrow.py | 8 ++++---- pandas/tests/arrays/string_/test_string_arrow.py | 10 +++++----- pandas/tests/extension/test_arrow.py | 15 +++++++-------- pandas/tests/groupby/test_groupby_dropna.py | 4 ++-- pandas/tests/indexes/multi/test_constructors.py | 4 ++-- pandas/tests/io/test_parquet.py | 5 ++--- 17 files changed, 40 insertions(+), 47 deletions(-) diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 80bcd34ff4bf8..59971379353c1 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -31,7 +31,7 @@ jobs: matrix: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml] pattern: ["not single_cpu", "single_cpu"] - pyarrow_version: ["6", "7", "8"] + pyarrow_version: ["7", "8", "9"] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 601581868f661..aa229beed6a1c 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -39,7 +39,7 @@ dependencies: - openpyxl=3.0.7 - pandas-gbq=0.15.0 - psycopg2=2.8.6 - - pyarrow=5.0.0 + - pyarrow=6.0.0 - pymysql=1.0.2 - pyreadstat=1.1.2 - pytables=3.6.1 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index e7038c8aa4bbb..4a576ea73916c 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -388,7 +388,7 @@ PyTables 3.6.1 HDF5-based reading / writing blosc 1.21.0 Compression for HDF5 zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing -pyarrow 5.0.0 Parquet, ORC, and feather reading / writing +pyarrow 6.0.0 Parquet, ORC, and feather reading / writing pyreadstat 1.1.2 SPSS files (.sav) reading ========================= ================== ============================================================= diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 8baf262e79e5d..b466b54072c04 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -107,7 +107,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | Package | Minimum Version | Changed | +=================+=================+=========+ -| pyarrow | 5.0.0 | X | +| pyarrow | 6.0.0 | X | +-----------------+-----------------+---------+ See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index bbf1163ae9658..deec8de0d3878 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -29,7 +29,7 @@ Dtype, Frequency, ) -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 from pandas.core.dtypes.common import ( is_float_dtype, @@ -195,7 +195,7 @@ ] ] -if not pa_version_under5p0: +if not pa_version_under6p0: import pyarrow as pa UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 1ddc404544467..52eb0cf147296 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, @@ -150,7 +149,6 @@ def get_lzma_file() -> type[lzma.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under5p0", "pa_version_under6p0", "pa_version_under7p0", "pa_version_under8p0", diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index ef919d0622399..b644339a79de9 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -31,7 +31,7 @@ "pandas_gbq": "0.15.0", "psycopg2": "2.8.6", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", - "pyarrow": "5.0.0", + "pyarrow": "6.0.0", "pyreadstat": "1.1.2", "pytest": "6.0", "pyxlsb": "1.0.8", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 4c8fbe9ac02f0..2c132b627b965 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,13 +9,11 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under5p0 = _palv < Version("5.0.0") pa_version_under6p0 = _palv < Version("6.0.0") pa_version_under7p0 = _palv < Version("7.0.0") pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") except ImportError: - pa_version_under5p0 = True pa_version_under6p0 = True pa_version_under7p0 = True pa_version_under8p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9ed0088487264..63c5f0d8e7b1e 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -17,7 +17,6 @@ npt, ) from pandas.compat import ( - pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, ) @@ -44,7 +43,7 @@ validate_indices, ) -if not pa_version_under5p0: +if not pa_version_under6p0: import pyarrow as pa import pyarrow.compute as pc @@ -182,8 +181,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): _dtype: ArrowDtype def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: - if pa_version_under5p0: - msg = "pyarrow>=5.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under6p0: + msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) if isinstance(values, pa.Array): self._data = pa.chunked_array([values]) @@ -1004,7 +1003,7 @@ def _replace_with_indices( mask = np.zeros(len(chunk), dtype=np.bool_) mask[indices] = True - if pa_version_under5p0: + if pa_version_under6p0: arr = chunk.to_numpy(zero_copy_only=False) arr[mask] = value return pa.array(arr, type=chunk.type) diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 4f864abe811df..f5f87bea83b8f 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -5,7 +5,7 @@ import numpy as np from pandas._typing import DtypeObj -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ( @@ -13,7 +13,7 @@ register_extension_dtype, ) -if not pa_version_under5p0: +if not pa_version_under6p0: import pyarrow as pa @@ -66,8 +66,8 @@ class ArrowDtype(StorageExtensionDtype): def __init__(self, pyarrow_dtype: pa.DataType) -> None: super().__init__("pyarrow") - if pa_version_under5p0: - raise ImportError("pyarrow>=5.0.0 is required for ArrowDtype") + if pa_version_under6p0: + raise ImportError("pyarrow>=6.0.0 is required for ArrowDtype") if not isinstance(pyarrow_dtype, pa.DataType): raise ValueError( f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d0f580f9a8325..368930c243a76 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -18,7 +18,7 @@ npt, type_t, ) -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ( @@ -106,9 +106,9 @@ def __init__(self, storage=None) -> None: raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) - if storage == "pyarrow" and pa_version_under5p0: + if storage == "pyarrow" and pa_version_under6p0: raise ImportError( - "pyarrow>=5.0.0 is required for PyArrow backed StringArray." + "pyarrow>=6.0.0 is required for PyArrow backed StringArray." ) self.storage = storage diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7bb56e7b38058..538adcbc47911 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -16,7 +16,7 @@ Scalar, npt, ) -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 from pandas.core.dtypes.common import ( is_bool_dtype, @@ -39,7 +39,7 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -if not pa_version_under5p0: +if not pa_version_under6p0: import pyarrow as pa import pyarrow.compute as pc @@ -49,8 +49,8 @@ def _chk_pyarrow_available() -> None: - if pa_version_under5p0: - msg = "pyarrow>=5.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under6p0: + msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 7140466aaf6c3..8a6c2b0586a0c 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 import pandas as pd import pandas._testing as tm @@ -14,8 +14,8 @@ from pandas.core.arrays.string_arrow import ArrowStringArray skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under5p0, - reason="pyarrow>=5.0.0 is required for PyArrow backed StringArray", + pa_version_under6p0, + reason="pyarrow>=6.0.0 is required for PyArrow backed StringArray", ) @@ -118,11 +118,11 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - not pa_version_under5p0, + not pa_version_under6p0, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): - msg = re.escape("pyarrow>=5.0.0 is required for PyArrow backed") + msg = re.escape("pyarrow>=6.0.0 is required for PyArrow backed") with pytest.raises(ImportError, match=msg): StringDtype(storage="pyarrow") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 0b64165f6deb8..4c05fd31f53e3 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -23,7 +23,6 @@ from pandas.compat import ( is_ci_environment, is_platform_windows, - pa_version_under5p0, pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, @@ -929,7 +928,7 @@ def test_arith_series_with_scalar( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -952,7 +951,7 @@ def test_arith_series_with_scalar( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -991,7 +990,7 @@ def test_arith_frame_with_scalar( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1014,7 +1013,7 @@ def test_arith_frame_with_scalar( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1053,7 +1052,7 @@ def test_arith_series_with_array( if ( all_arithmetic_operators == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1070,7 +1069,7 @@ def test_arith_series_with_array( "__rsub__", ) and pa.types.is_unsigned_integer(pa_dtype) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1094,7 +1093,7 @@ def test_arith_series_with_array( elif ( all_arithmetic_operators in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under5p0 + and not pa_version_under6p0 ): request.node.add_marker( pytest.mark.xfail( diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index f87f656ea896a..a6088e4999402 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under5p0 +from pandas.compat.pyarrow import pa_version_under6p0 from pandas.core.dtypes.missing import na_value_for_dtype @@ -415,7 +415,7 @@ def test_groupby_drop_nan_with_multi_index(): pytest.param( "string[pyarrow]", marks=pytest.mark.skipif( - pa_version_under5p0, reason="pyarrow is not installed" + pa_version_under6p0, reason="pyarrow is not installed" ), ), "datetime64[ns]", diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index 9738ea5b5d35e..fa03855facedf 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under5p0 +from pandas.compat import pa_version_under6p0 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -650,7 +650,7 @@ def test_from_frame(): tm.assert_index_equal(expected, result) -@pytest.mark.skipif(pa_version_under5p0, reason="minimum pyarrow not installed") +@pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") def test_from_frame_missing_values_multiIndex(): # GH 39984 import pyarrow as pa diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 0fa6734ee3ca5..45b19badf48f3 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -15,7 +15,6 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under5p0, pa_version_under6p0, pa_version_under8p0, ) @@ -236,7 +235,7 @@ def check_partition_names(path, expected): expected: iterable of str Expected partition names. """ - if pa_version_under5p0: + if pa_version_under6p0: import pyarrow.parquet as pq dataset = pq.ParquetDataset(path, validate_schema=False) @@ -969,7 +968,7 @@ def test_timestamp_nanoseconds(self, pa): def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( - not pa_version_under5p0 + not pa_version_under6p0 and timezone_aware_date_list.tzinfo != datetime.timezone.utc ): request.node.add_marker(