diff --git a/ci/deps/actions-38-locale.yaml b/ci/deps/actions-38-locale.yaml index 13b132109effb..b7043735d9457 100644 --- a/ci/deps/actions-38-locale.yaml +++ b/ci/deps/actions-38-locale.yaml @@ -35,7 +35,7 @@ dependencies: - xlsxwriter - xlwt - moto - - pyarrow=1.0.0 + - pyarrow=1.0.1 - pip - pip: - pyxlsb diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index fd5c46f7a6d5a..57b13fef9ad8a 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,7 @@ np_version_under1p20, ) from pandas.compat.pyarrow import ( - pa_version_under1p0, + pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -153,7 +153,7 @@ def get_lzma_file(lzma): "np_datetime64_compat", "np_version_under1p19", "np_version_under1p20", - "pa_version_under1p0", + "pa_version_under1p01", "pa_version_under2p0", "pa_version_under3p0", "pa_version_under4p0", diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index adf20f3322a79..1cf57404bbe01 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -21,7 +21,7 @@ "odfpy": "1.4.1", "openpyxl": "3.0.2", "pandas_gbq": "0.14.0", - "pyarrow": "0.17.0", + "pyarrow": "1.0.1", "pytest": "6.0", "pyxlsb": "1.0.6", "s3fs": "0.4.0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index f9b9409317774..e6ac0c59e789a 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -7,14 +7,14 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under1p0 = _palv < Version("1.0.0") + pa_version_under1p01 = _palv < Version("1.0.1") pa_version_under2p0 = _palv < Version("2.0.0") pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") pa_version_under6p0 = _palv < Version("6.0.0") except ImportError: - pa_version_under1p0 = True + pa_version_under1p01 = True pa_version_under2p0 = True pa_version_under3p0 = True pa_version_under4p0 = True diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index e9fb5bdf80045..5a8e5f488fbf2 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -19,7 +19,7 @@ Scalar, type_t, ) -from pandas.compat import pa_version_under1p0 +from pandas.compat import pa_version_under1p01 from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ( @@ -104,11 +104,10 @@ def __init__(self, storage=None): raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) - if storage == "pyarrow" and pa_version_under1p0: + if storage == "pyarrow" and pa_version_under1p01: raise ImportError( "pyarrow>=1.0.0 is required for PyArrow backed StringArray." ) - self.storage = storage @property diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index b3278a81e93b7..b1daf0e393ef0 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -27,7 +27,7 @@ npt, ) from pandas.compat import ( - pa_version_under1p0, + pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -63,10 +63,7 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -# PyArrow backed StringArrays are available starting at 1.0.0, but this -# file is imported from even if pyarrow is < 1.0.0, before pyarrow.compute -# and its compute functions existed. GH38801 -if not pa_version_under1p0: +if not pa_version_under1p01: import pyarrow as pa import pyarrow.compute as pc @@ -87,7 +84,7 @@ def _chk_pyarrow_available() -> None: - if pa_version_under1p0: + if pa_version_under1p01: msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." raise ImportError(msg) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index d66a603ad568c..3f0a1b5d0eaf3 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -1,12 +1,10 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm -pa = pytest.importorskip("pyarrow", minversion="0.17.0") +pa = pytest.importorskip("pyarrow", minversion="1.0.1") from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask @@ -29,7 +27,6 @@ def test_arrow_array(data): assert arr.equals(expected) -@td.skip_if_no("pyarrow") def test_arrow_roundtrip(data): df = pd.DataFrame({"a": data}) table = pa.table(df) @@ -39,7 +36,6 @@ def test_arrow_roundtrip(data): tm.assert_frame_equal(result, df) -@td.skip_if_no("pyarrow") def test_arrow_load_from_zero_chunks(data): # GH-41040 @@ -54,7 +50,6 @@ def test_arrow_load_from_zero_chunks(data): tm.assert_frame_equal(result, df) -@td.skip_if_no("pyarrow") def test_arrow_from_arrow_uint(): # https://github.com/pandas-dev/pandas/issues/31896 # possible mismatch in types @@ -66,7 +61,6 @@ def test_arrow_from_arrow_uint(): tm.assert_extension_array_equal(result, expected) -@td.skip_if_no("pyarrow") def test_arrow_sliced(data): # https://github.com/pandas-dev/pandas/issues/38525 @@ -161,7 +155,6 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): tm.assert_numpy_array_equal(mask, mask_expected_empty) -@td.skip_if_no("pyarrow") def test_from_arrow_type_error(request, data): # ensure that __from_arrow__ returns a TypeError when getting a wrong # array type diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 5211397f20c36..560299a4a47f5 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,7 +1,5 @@ import pytest -import pandas.util._test_decorators as td - from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd @@ -11,10 +9,9 @@ period_array, ) -pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0") +pa = pytest.importorskip("pyarrow", minversion="1.0.1") -@pyarrow_skip def test_arrow_extension_type(): from pandas.core.arrays._arrow_utils import ArrowPeriodType @@ -29,7 +26,6 @@ def test_arrow_extension_type(): assert not hash(p1) == hash(p3) -@pyarrow_skip @pytest.mark.parametrize( "data, freq", [ @@ -38,8 +34,6 @@ def test_arrow_extension_type(): ], ) def test_arrow_array(data, freq): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType periods = period_array(data, freq=freq) @@ -62,10 +56,7 @@ def test_arrow_array(data, freq): pa.array(periods, type=ArrowPeriodType("T")) -@pyarrow_skip def test_arrow_array_missing(): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") @@ -78,10 +69,7 @@ def test_arrow_array_missing(): assert result.storage.equals(expected) -@pyarrow_skip def test_arrow_table_roundtrip(): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") @@ -100,10 +88,8 @@ def test_arrow_table_roundtrip(): tm.assert_frame_equal(result, expected) -@pyarrow_skip def test_arrow_load_from_zero_chunks(): # GH-41040 - import pyarrow as pa from pandas.core.arrays._arrow_utils import ArrowPeriodType @@ -120,10 +106,7 @@ def test_arrow_load_from_zero_chunks(): tm.assert_frame_equal(result, df) -@pyarrow_skip def test_arrow_table_roundtrip_without_metadata(): - import pyarrow as pa - arr = PeriodArray([1, 2, 3], freq="H") arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index c3f951adf7f89..265afa89d6530 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under1p0 +from pandas.compat import pa_version_under1p01 import pandas as pd import pandas._testing as tm @@ -14,7 +14,7 @@ from pandas.core.arrays.string_arrow import ArrowStringArray skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under1p0, + pa_version_under1p01, reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", ) @@ -118,7 +118,7 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - not pa_version_under1p0, + not pa_version_under1p01, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index d262f09182a9c..320bfc13f7032 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -6,7 +6,7 @@ from pandas.api.types import is_bool_dtype from pandas.tests.extension import base -pytest.importorskip("pyarrow", minversion="0.13.0") +pytest.importorskip("pyarrow", minversion="1.0.1") from pandas.tests.extension.arrow.arrays import ( # isort:skip ArrowBoolArray, diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py index c61cc30950a23..fe2c484731019 100644 --- a/pandas/tests/extension/arrow/test_timestamp.py +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -12,7 +12,7 @@ register_extension_dtype, ) -pytest.importorskip("pyarrow", minversion="0.13.0") +pytest.importorskip("pyarrow", minversion="1.0.1") import pyarrow as pa # isort:skip diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 97ebb3a0d39ba..59c7abc4a4cb8 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -2,14 +2,12 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm from pandas.io.feather_format import read_feather, to_feather # isort:skip -pyarrow = pytest.importorskip("pyarrow") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse") @@ -120,7 +118,6 @@ def test_read_columns(self): columns = ["col1", "col3"] self.check_round_trip(df, expected=df[columns], columns=columns) - @td.skip_if_no("pyarrow", min_version="0.17.1") def read_columns_different_order(self): # GH 33878 df = pd.DataFrame({"A": [1, 2], "B": ["x", "y"], "C": [True, False]}) @@ -180,12 +177,10 @@ def test_path_localpath(self): result = tm.round_trip_localpath(df.to_feather, read_feather) tm.assert_frame_equal(df, result) - @td.skip_if_no("pyarrow", min_version="0.17.0") def test_passthrough_keywords(self): df = tm.makeDataFrame().reset_index() self.check_round_trip(df, write_kwargs={"version": 1}) - @td.skip_if_no("pyarrow") @tm.network def test_http_path(self, feather_file): # GH 29055 diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ec724602c5249..270cb402483bf 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -15,7 +15,6 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under1p0, pa_version_under2p0, pa_version_under5p0, ) @@ -784,11 +783,7 @@ def test_s3_roundtrip_for_dir( # only used if partition field is string, but this changed again to use # category dtype for all types (not only strings) in pyarrow 2.0.0 if partition_col: - partition_col_type = ( - "int32" - if (not pa_version_under1p0) and pa_version_under2p0 - else "category" - ) + partition_col_type = "int32" if pa_version_under2p0 else "category" expected_df[partition_col] = expected_df[partition_col].astype( partition_col_type