From 31c6db7c4862cf141de627e473a46e1afd674e8a Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 20:48:35 +0100 Subject: [PATCH 1/7] Upgrade ci minimum version to 7.0 --- ci/deps/actions-38-minimum_versions.yaml | 2 +- doc/source/getting_started/install.rst | 2 +- doc/source/whatsnew/v2.0.0.rst | 2 +- pandas/compat/_optional.py | 2 +- pandas/compat/pyarrow.py | 2 - pandas/core/arrays/arrow/array.py | 17 ++--- pandas/core/arrays/arrow/dtype.py | 8 +-- pandas/core/arrays/string_.py | 6 +- pandas/core/arrays/string_arrow.py | 8 +-- pandas/tests/arrays/string_/test_string.py | 8 +-- .../tests/arrays/string_/test_string_arrow.py | 10 +-- pandas/tests/extension/test_arrow.py | 65 ++----------------- pandas/tests/extension/test_string.py | 29 +-------- pandas/tests/frame/methods/test_astype.py | 4 +- pandas/tests/groupby/test_groupby_dropna.py | 4 +- .../tests/indexes/multi/test_constructors.py | 4 +- pandas/tests/io/parser/test_parse_dates.py | 7 +- pandas/tests/io/test_parquet.py | 10 +-- pyproject.toml | 6 +- 19 files changed, 48 insertions(+), 148 deletions(-) diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index caeee07c324d1..7652b6347ad4f 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -43,7 +43,7 @@ dependencies: - openpyxl=3.0.7 - pandas-gbq=0.15.0 - psycopg2=2.8.6 - - pyarrow=6.0.0 + - pyarrow=7.0.0 - pymysql=1.0.2 - pyreadstat=1.1.2 - pytables=3.6.1 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 687f00a3dffd9..c1295df53fc2b 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -441,7 +441,7 @@ PyTables 3.6.1 hdf5 HDF5-based reading blosc 1.21.0 hdf5 Compression for HDF5; only available on ``conda`` zlib hdf5 Compression for HDF5 fastparquet 0.6.3 - Parquet reading / writing (pyarrow is default) -pyarrow 6.0.0 parquet, feather Parquet, ORC, and feather reading / writing +pyarrow 7.0.0 parquet, feather Parquet, ORC, and feather reading / writing pyreadstat 1.1.2 spss SPSS files (.sav) reading odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing ========================= ================== ================ ============================================================= diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bc1cf8d03ce98..153a52d44d31a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -647,7 +647,7 @@ Optional libraries below the lowest tested version may still work, but are not c +-----------------+-----------------+---------+ | Package | Minimum Version | Changed | +=================+=================+=========+ -| pyarrow | 6.0.0 | X | +| pyarrow | 7.0.0 | X | +-----------------+-----------------+---------+ | matplotlib | 3.6.1 | X | +-----------------+-----------------+---------+ diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index d98b23b215565..01ac462eeb659 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -31,7 +31,7 @@ "pandas_gbq": "0.15.0", "psycopg2": "2.8.6", # (dt dec pq3 ext lo64) "pymysql": "1.0.2", - "pyarrow": "6.0.0", + "pyarrow": "7.0.0", "pyreadstat": "1.1.2", "pytest": "7.0.0", "pyxlsb": "1.0.8", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 280fdabf2cc05..ea8e18437fcfb 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -9,13 +9,11 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under6p0 = _palv < Version("6.0.0") pa_version_under7p0 = _palv < Version("7.0.0") pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") pa_version_under10p0 = _palv < Version("10.0.0") except ImportError: - pa_version_under6p0 = True pa_version_under7p0 = True pa_version_under8p0 = True pa_version_under9p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 9247d26fc846d..ba6231cddf85c 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -26,7 +26,6 @@ npt, ) from pandas.compat import ( - pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, pa_version_under9p0, @@ -54,7 +53,7 @@ validate_indices, ) -if not pa_version_under6p0: +if not pa_version_under7p0: import pyarrow as pa import pyarrow.compute as pc @@ -199,8 +198,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray): _dtype: ArrowDtype def __init__(self, values: pa.Array | pa.ChunkedArray) -> None: - if pa_version_under6p0: - msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under7p0: + msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) if isinstance(values, pa.Array): self._data = pa.chunked_array([values]) @@ -529,11 +528,6 @@ def _argmin_max(self, skipna: bool, method: str) -> int: # let ExtensionArray.arg{max|min} raise return getattr(super(), f"arg{method}")(skipna=skipna) - if pa_version_under6p0: - raise NotImplementedError( - f"arg{method} only implemented for pyarrow version >= 6.0" - ) - data = self._data if pa.types.is_duration(data.type): data = data.cast(pa.int64()) @@ -567,7 +561,7 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: ------- ArrowExtensionArray """ - if pa_version_under6p0: + if pa_version_under7p0: fallback_performancewarning(version="6") return super().dropna() else: @@ -1293,9 +1287,6 @@ def _mode(self: ArrowExtensionArrayT, dropna: bool = True) -> ArrowExtensionArra same type as self Sorted, if possible. """ - if pa_version_under6p0: - raise NotImplementedError("mode only supported for pyarrow version >= 6.0") - pa_type = self._data.type if pa.types.is_temporal(pa_type): nbits = pa_type.bit_width diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index 3e3213b48670f..bed2ed113606e 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -5,7 +5,7 @@ import numpy as np from pandas._typing import DtypeObj -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ( @@ -13,7 +13,7 @@ register_extension_dtype, ) -if not pa_version_under6p0: +if not pa_version_under7p0: import pyarrow as pa @@ -66,8 +66,8 @@ class ArrowDtype(StorageExtensionDtype): def __init__(self, pyarrow_dtype: pa.DataType) -> None: super().__init__("pyarrow") - if pa_version_under6p0: - raise ImportError("pyarrow>=6.0.0 is required for ArrowDtype") + if pa_version_under7p0: + raise ImportError("pyarrow>=7.0.0 is required for ArrowDtype") if not isinstance(pyarrow_dtype, pa.DataType): raise ValueError( f"pyarrow_dtype ({pyarrow_dtype}) must be an instance " diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 9b26db07fc28f..00f7e84f49bf2 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -21,7 +21,7 @@ npt, type_t, ) -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 from pandas.compat.numpy import function as nv from pandas.util._decorators import doc @@ -115,9 +115,9 @@ def __init__(self, storage=None) -> None: raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) - if storage == "pyarrow" and pa_version_under6p0: + if storage == "pyarrow" and pa_version_under7p0: raise ImportError( - "pyarrow>=6.0.0 is required for PyArrow backed StringArray." + "pyarrow>=7.0.0 is required for PyArrow backed StringArray." ) self.storage = storage diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 4aebe61412866..717d41785f002 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -17,7 +17,7 @@ Scalar, npt, ) -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 from pandas.core.dtypes.common import ( is_bool_dtype, @@ -40,7 +40,7 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -if not pa_version_under6p0: +if not pa_version_under7p0: import pyarrow as pa import pyarrow.compute as pc @@ -50,8 +50,8 @@ def _chk_pyarrow_available() -> None: - if pa_version_under6p0: - msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray." + if pa_version_under7p0: + msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray." raise ImportError(msg) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index da5fc46c03d92..ecb9878fbb341 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -5,7 +5,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under6p0 import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_dtype_equal @@ -358,11 +357,6 @@ def test_reduce_missing(skipna, dtype): @pytest.mark.parametrize("method", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) def test_min_max(method, skipna, dtype, request): - if dtype.storage == "pyarrow" and pa_version_under6p0: - reason = "'ArrowStringArray' object has no attribute 'max'" - mark = pytest.mark.xfail(raises=TypeError, reason=reason) - request.node.add_marker(mark) - arr = pd.Series(["a", "b", "c", None], dtype=dtype) result = getattr(arr, method)(skipna=skipna) if skipna: @@ -375,7 +369,7 @@ def test_min_max(method, skipna, dtype, request): @pytest.mark.parametrize("method", ["min", "max"]) @pytest.mark.parametrize("box", [pd.Series, pd.array]) def test_min_max_numpy(method, box, dtype, request): - if dtype.storage == "pyarrow" and (pa_version_under6p0 or box is pd.array): + if dtype.storage == "pyarrow" and box is pd.array: if box is pd.array: reason = "'<=' not supported between instances of 'str' and 'NoneType'" else: diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 071f5cad725cf..07c6bca67311b 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -4,7 +4,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 import pandas as pd import pandas._testing as tm @@ -15,8 +15,8 @@ from pandas.core.arrays.string_arrow import ArrowStringArray skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under6p0, - reason="pyarrow>=6.0.0 is required for PyArrow backed StringArray", + pa_version_under7p0, + reason="pyarrow>=7.0.0 is required for PyArrow backed StringArray", ) @@ -119,11 +119,11 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - not pa_version_under6p0, + not pa_version_under7p0, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): - msg = re.escape("pyarrow>=6.0.0 is required for PyArrow backed") + msg = re.escape("pyarrow>=7.0.0 is required for PyArrow backed") with pytest.raises(ImportError, match=msg): StringDtype(storage="pyarrow") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index e31d8605eeb06..6ec2bca4a46d5 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -29,7 +29,6 @@ PY311, is_ci_environment, is_platform_windows, - pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, pa_version_under9p0, @@ -51,7 +50,7 @@ ) from pandas.tests.extension import base -pa = pytest.importorskip("pyarrow", minversion="6.0.0") +pa = pytest.importorskip("pyarrow", minversion="7.0.0") from pandas.core.arrays.arrow.array import ArrowExtensionArray @@ -275,13 +274,6 @@ def test_from_sequence_pa_array(self, data, request): assert isinstance(result._data, pa.ChunkedArray) def test_from_sequence_pa_array_notimplemented(self, request): - if pa_version_under6p0: - request.node.add_marker( - pytest.mark.xfail( - raises=AttributeError, - reason="month_day_nano_interval not implemented by pyarrow.", - ) - ) with pytest.raises(NotImplementedError, match="Converting strings to"): ArrowExtensionArray._from_sequence_of_strings( ["12-1"], dtype=pa.month_day_nano_interval() @@ -320,13 +312,6 @@ def test_from_sequence_of_strings_pa_array(self, data, request): ), ) ) - elif pa_version_under6p0 and pa.types.is_temporal(pa_dtype): - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=f"pyarrow doesn't support string cast from {pa_dtype}", - ) - ) pa_array = data._data.cast(pa.string()) result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype) tm.assert_extension_array_equal(result, data) @@ -525,28 +510,8 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): ) if all_numeric_reductions in {"skew", "kurt"}: request.node.add_marker(xfail_mark) - elif ( - all_numeric_reductions in {"median", "var", "std", "prod", "max", "min"} - and pa_version_under6p0 - ): - request.node.add_marker(xfail_mark) elif all_numeric_reductions == "sem" and pa_version_under8p0: request.node.add_marker(xfail_mark) - elif ( - all_numeric_reductions in {"sum", "mean"} - and skipna is False - and pa_version_under6p0 - and (pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype)) - ): - request.node.add_marker( - pytest.mark.xfail( - raises=AssertionError, - reason=( - f"{all_numeric_reductions} with skip_nulls={skipna} did not " - f"return NA for {pa_dtype} with pyarrow={pa.__version__}" - ), - ) - ) elif all_numeric_reductions in [ "mean", @@ -785,10 +750,7 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): def test_dropna_array(self, data_missing): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under6p0, check_stacklevel=False - ): - super().test_dropna_array(data_missing) + super().test_dropna_array(data_missing) def test_fillna_no_op_returns_copy(self, data): with tm.maybe_produces_warning( @@ -910,11 +872,6 @@ def test_value_counts_with_normalize(self, data, request): ): super().test_value_counts_with_normalize(data) - @pytest.mark.xfail( - pa_version_under6p0, - raises=NotImplementedError, - reason="argmin/max only implemented for pyarrow version >= 6.0", - ) def test_argmin_argmax( self, data_for_sorting, data_missing_for_sorting, na_value, request ): @@ -943,13 +900,6 @@ def test_argmin_argmax( def test_argreduce_series( self, data_missing_for_sorting, op_name, skipna, expected, request ): - if pa_version_under6p0 and skipna: - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason="min_max not supported in pyarrow", - ) - ) super().test_argreduce_series( data_missing_for_sorting, op_name, skipna, expected ) @@ -1118,7 +1068,7 @@ def _get_arith_xfail_marker(self, opname, pa_dtype): if ( opname == "__rpow__" and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under6p0 + and not pa_version_under7p0 ): mark = pytest.mark.xfail( reason=( @@ -1137,7 +1087,7 @@ def _get_arith_xfail_marker(self, opname, pa_dtype): elif ( opname in {"__rtruediv__", "__rfloordiv__"} and (pa.types.is_floating(pa_dtype) or pa.types.is_integer(pa_dtype)) - and not pa_version_under6p0 + and not pa_version_under7p0 ): mark = pytest.mark.xfail( raises=pa.ArrowInvalid, @@ -1224,7 +1174,7 @@ def test_arith_series_with_array( "__rsub__", ) and pa.types.is_unsigned_integer(pa_dtype) - and not pa_version_under6p0 + and not pa_version_under7p0 ): request.node.add_marker( pytest.mark.xfail( @@ -1430,11 +1380,6 @@ def test_quantile(data, interpolation, quantile, request): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - pa_version_under6p0, - raises=NotImplementedError, - reason="mode only supported for pyarrow version >= 6.0", -) @pytest.mark.parametrize("dropna", [True, False]) @pytest.mark.parametrize( "take_idx, exp_idx", diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 3e865947aa968..0743c1e26c62f 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -18,10 +18,7 @@ import numpy as np import pytest -from pandas.compat import ( - pa_version_under6p0, - pa_version_under7p0, -) +from pandas.compat import pa_version_under7p0 from pandas.errors import PerformanceWarning import pandas as pd @@ -160,11 +157,7 @@ class TestIndex(base.BaseIndexTests): class TestMissing(base.BaseMissingTests): def test_dropna_array(self, data_missing): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under6p0 and data_missing.dtype.storage == "pyarrow", - ): - result = data_missing.dropna() + result = data_missing.dropna() expected = data_missing[[1]] self.assert_extension_array_equal(result, expected) @@ -220,13 +213,6 @@ def test_argsort_missing(self, data_missing_for_sorting): def test_argmin_argmax( self, data_for_sorting, data_missing_for_sorting, na_value, request ): - if pa_version_under6p0 and data_missing_for_sorting.dtype.storage == "pyarrow": - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason="min_max not supported in pyarrow", - ) - ) super().test_argmin_argmax(data_for_sorting, data_missing_for_sorting, na_value) @pytest.mark.parametrize( @@ -245,17 +231,6 @@ def test_argmin_argmax( def test_argreduce_series( self, data_missing_for_sorting, op_name, skipna, expected, request ): - if ( - pa_version_under6p0 - and data_missing_for_sorting.dtype.storage == "pyarrow" - and skipna - ): - request.node.add_marker( - pytest.mark.xfail( - raises=NotImplementedError, - reason="min_max not supported in pyarrow", - ) - ) super().test_argreduce_series( data_missing_for_sorting, op_name, skipna, expected ) diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 8a1a2783b5dc6..c13817dd1cdb7 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 import pandas.util._test_decorators as td import pandas as pd @@ -870,7 +870,7 @@ def test_frame_astype_no_copy(): assert np.shares_memory(df.b.values, result.b.values) -@pytest.mark.skipif(pa_version_under6p0, reason="pyarrow is required for this test") +@pytest.mark.skipif(pa_version_under7p0, reason="pyarrow is required for this test") @pytest.mark.parametrize("dtype", ["int64", "Int64"]) def test_astype_copies(dtype): # GH#50984 diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index 5418a2a60dc80..31a8e7a7d36ac 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under6p0 +from pandas.compat.pyarrow import pa_version_under7p0 from pandas.core.dtypes.missing import na_value_for_dtype @@ -416,7 +416,7 @@ def test_groupby_drop_nan_with_multi_index(): pytest.param( "string[pyarrow]", marks=pytest.mark.skipif( - pa_version_under6p0, reason="pyarrow is not installed" + pa_version_under7p0, reason="pyarrow is not installed" ), ), "datetime64[ns]", diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index fa03855facedf..78b46e5a32a48 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike @@ -650,7 +650,7 @@ def test_from_frame(): tm.assert_index_equal(expected, result) -@pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") +@pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed") def test_from_frame_missing_values_multiIndex(): # GH 39984 import pyarrow as pa diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index d985800d943bd..09a2967d62fee 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -19,10 +19,7 @@ from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import py_parse_datetime_string -from pandas.compat.pyarrow import ( - pa_version_under6p0, - pa_version_under7p0, -) +from pandas.compat.pyarrow import pa_version_under7p0 import pandas as pd from pandas import ( @@ -456,7 +453,7 @@ def test_date_col_as_index_col(all_parsers): columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index, ) - if parser.engine == "pyarrow" and not pa_version_under6p0: + if parser.engine == "pyarrow" and not pa_version_under7p0: # https://github.com/pandas-dev/pandas/issues/44231 # pyarrow 6.0 starts to infer time type expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 4c884e20cf423..9e59bfca71e21 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -12,7 +12,7 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under6p0, + pa_version_under7p0, pa_version_under8p0, ) import pandas.util._test_decorators as td @@ -221,7 +221,7 @@ def check_partition_names(path, expected): expected: iterable of str Expected partition names. """ - if pa_version_under6p0: + if pa_version_under7p0: import pyarrow.parquet as pq dataset = pq.ParquetDataset(path, validate_schema=False) @@ -966,8 +966,8 @@ def test_additional_extension_types(self, pa): def test_timestamp_nanoseconds(self, pa): # with version 2.6, pyarrow defaults to writing the nanoseconds, so # this should work without error - # Note in previous pyarrows(<6.0.0), only the pseudo-version 2.0 was available - if not pa_version_under6p0: + # Note in previous pyarrows(<7.0.0), only the pseudo-version 2.0 was available + if not pa_version_under7p0: ver = "2.6" else: ver = "2.0" @@ -976,7 +976,7 @@ def test_timestamp_nanoseconds(self, pa): def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( - not pa_version_under6p0 + not pa_version_under7p0 and timezone_aware_date_list.tzinfo != datetime.timezone.utc ): request.node.add_marker( diff --git a/pyproject.toml b/pyproject.toml index 3ddc247292ca9..e5d6f420915ef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,8 +63,8 @@ fss = ['fsspec>=2021.07.0'] aws = ['s3fs>=2021.08.0'] gcp = ['gcsfs>=2021.07.0', 'pandas-gbq>=0.15.0'] excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3'] -parquet = ['pyarrow>=6.0.0'] -feather = ['pyarrow>=6.0.0'] +parquet = ['pyarrow>=7.0.0'] +feather = ['pyarrow>=7.0.0'] hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) #'blosc>=1.20.1', 'tables>=3.6.1'] @@ -97,7 +97,7 @@ all = ['beautifulsoup4>=4.9.3', 'openpyxl>=3.0.7', 'pandas-gbq>=0.15.0', 'psycopg2>=2.8.6', - 'pyarrow>=6.0.0', + 'pyarrow>=7.0.0', 'pymysql>=1.0.2', 'PyQt5>=5.15.1', 'pyreadstat>=1.1.2', From 01662c4e3a8637a088aed81f6c4f57055edb8489 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 20:51:19 +0100 Subject: [PATCH 2/7] Fix --- pandas/tests/io/parser/test_parse_dates.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 09a2967d62fee..7f5a142eab826 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -453,10 +453,7 @@ def test_date_col_as_index_col(all_parsers): columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index, ) - if parser.engine == "pyarrow" and not pa_version_under7p0: - # https://github.com/pandas-dev/pandas/issues/44231 - # pyarrow 6.0 starts to infer time type - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) From c5be5963dfd8b3c79c8facb073cb279819f96b74 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 21:07:05 +0100 Subject: [PATCH 3/7] Remove --- pandas/_testing/__init__.py | 4 ++-- pandas/compat/__init__.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index a738220db20da..dac949f69bfaf 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -29,7 +29,7 @@ Frequency, NpDtype, ) -from pandas.compat import pa_version_under6p0 +from pandas.compat import pa_version_under7p0 from pandas.core.dtypes.common import ( is_float_dtype, @@ -191,7 +191,7 @@ ] ] -if not pa_version_under6p0: +if not pa_version_under7p0: import pyarrow as pa UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index b59b9632913e4..052eb7792a19c 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -27,7 +27,6 @@ np_version_under1p21, ) from pandas.compat.pyarrow import ( - pa_version_under6p0, pa_version_under7p0, pa_version_under8p0, pa_version_under9p0, @@ -157,7 +156,6 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]: __all__ = [ "is_numpy_dev", "np_version_under1p21", - "pa_version_under6p0", "pa_version_under7p0", "pa_version_under8p0", "pa_version_under9p0", From fb08a2aab7b6bdede71ad446f8666ff35da4654f Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 3 Feb 2023 22:20:14 +0100 Subject: [PATCH 4/7] Add cond back in --- pandas/tests/io/parser/test_parse_dates.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 7f5a142eab826..09a2967d62fee 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -453,7 +453,10 @@ def test_date_col_as_index_col(all_parsers): columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index, ) - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + if parser.engine == "pyarrow" and not pa_version_under7p0: + # https://github.com/pandas-dev/pandas/issues/44231 + # pyarrow 6.0 starts to infer time type + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) From 2ea4da2ad8fc9aef3ef2f68845f7df04bd3092ef Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 4 Feb 2023 01:26:36 +0100 Subject: [PATCH 5/7] Remove super --- pandas/tests/extension/test_arrow.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6ec2bca4a46d5..522a0d59e4161 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -749,9 +749,6 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): - def test_dropna_array(self, data_missing): - super().test_dropna_array(data_missing) - def test_fillna_no_op_returns_copy(self, data): with tm.maybe_produces_warning( PerformanceWarning, pa_version_under7p0, check_stacklevel=False From 03bcaa5af1e29d0ac4914330011d2ce7978abdd7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 6 Feb 2023 20:58:16 +0100 Subject: [PATCH 6/7] Remove if --- pandas/core/arrays/arrow/array.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index ba6231cddf85c..81fabf40d05b6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -561,11 +561,7 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT: ------- ArrowExtensionArray """ - if pa_version_under7p0: - fallback_performancewarning(version="6") - return super().dropna() - else: - return type(self)(pc.drop_null(self._data)) + return type(self)(pc.drop_null(self._data)) @doc(ExtensionArray.fillna) def fillna( From 5d8afdb0b26e3a6da99b34b75cae39c9534fbb57 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 6 Feb 2023 21:46:41 +0100 Subject: [PATCH 7/7] Fix merge --- pandas/tests/io/test_parquet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 9a203cc7717fd..7aba335040098 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -591,7 +591,7 @@ def test_write_column_index_nonstring(self, pa): msg = r"parquet must have string column names" self.check_error_on_write(df, engine, ValueError, msg) - @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") + @pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed") def test_use_nullable_dtypes(self, engine, request): import pyarrow.parquet as pq @@ -641,7 +641,7 @@ def test_use_nullable_dtypes(self, engine, request): expected = expected.drop("c", axis=1) tm.assert_frame_equal(result2, expected) - @pytest.mark.skipif(pa_version_under6p0, reason="minimum pyarrow not installed") + @pytest.mark.skipif(pa_version_under7p0, reason="minimum pyarrow not installed") def test_use_nullable_dtypes_option(self, engine, request): # GH#50748 import pyarrow.parquet as pq