From f4c3c803616ed080a4035235217d296927417a5a Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 21:18:25 -0500 Subject: [PATCH 1/9] CLN: remove pyarrow<1 compat --- pandas/compat/__init__.py | 2 - pandas/compat/_optional.py | 2 +- pandas/compat/pyarrow.py | 2 - pandas/core/arrays/string_.py | 6 --- pandas/core/arrays/string_arrow.py | 36 ++++++---------- .../tests/arrays/masked/test_arrow_compat.py | 7 +-- .../tests/arrays/period/test_arrow_compat.py | 43 ++++++------------- .../tests/arrays/string_/test_string_arrow.py | 7 +-- pandas/tests/extension/arrow/test_bool.py | 2 +- .../tests/extension/arrow/test_timestamp.py | 2 +- pandas/tests/io/test_feather.py | 5 +-- pandas/tests/io/test_parquet.py | 7 +-- 12 files changed, 37 insertions(+), 84 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 3233de8e3b6d1..d38f3c8179310 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,7 +20,6 @@ np_version_under1p20, ) from pandas.compat.pyarrow import ( - pa_version_under1p0, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -151,7 +150,6 @@ def get_lzma_file(lzma): "np_datetime64_compat", "np_version_under1p19", "np_version_under1p20", - "pa_version_under1p0", "pa_version_under2p0", "pa_version_under3p0", "pa_version_under4p0", diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index adf20f3322a79..1cf57404bbe01 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -21,7 +21,7 @@ "odfpy": "1.4.1", "openpyxl": "3.0.2", "pandas_gbq": "0.14.0", - "pyarrow": "0.17.0", + "pyarrow": "1.0.1", "pytest": "6.0", "pyxlsb": "1.0.6", "s3fs": "0.4.0", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 9bf7139769baa..112ebe654117d 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -7,13 +7,11 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) - pa_version_under1p0 = _palv < Version("1.0.0") pa_version_under2p0 = _palv < Version("2.0.0") pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") except ImportError: - pa_version_under1p0 = True pa_version_under2p0 = True pa_version_under3p0 = True pa_version_under4p0 = True diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index d93fa4bbdd7fc..ec3e06d99df93 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -19,7 +19,6 @@ Scalar, type_t, ) -from pandas.compat import pa_version_under1p0 from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ( @@ -104,11 +103,6 @@ def __init__(self, storage=None): raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) - if storage == "pyarrow" and pa_version_under1p0: - raise ImportError( - "pyarrow>=1.0.0 is required for PyArrow backed StringArray." - ) - self.storage = storage @property diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index c7d08f7873c09..6eb9f2c21a005 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -11,6 +11,8 @@ ) import numpy as np +import pyarrow as pa +import pyarrow.compute as pc from pandas._libs import ( lib, @@ -27,11 +29,11 @@ npt, ) from pandas.compat import ( - pa_version_under1p0, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, ) +from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs @@ -65,21 +67,14 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -# PyArrow backed StringArrays are available starting at 1.0.0, but this -# file is imported from even if pyarrow is < 1.0.0, before pyarrow.compute -# and its compute functions existed. GH38801 -if not pa_version_under1p0: - import pyarrow as pa - import pyarrow.compute as pc - - ARROW_CMP_FUNCS = { - "eq": pc.equal, - "ne": pc.not_equal, - "lt": pc.less, - "gt": pc.greater, - "le": pc.less_equal, - "ge": pc.greater_equal, - } +ARROW_CMP_FUNCS = { + "eq": pc.equal, + "ne": pc.not_equal, + "lt": pc.less, + "gt": pc.greater, + "le": pc.less_equal, + "ge": pc.greater_equal, +} if TYPE_CHECKING: @@ -88,12 +83,6 @@ ArrowStringScalarOrNAT = Union[str, libmissing.NAType] -def _chk_pyarrow_available() -> None: - if pa_version_under1p0: - msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." - raise ImportError(msg) - - # TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from # ObjectStringArrayMixin because we want to have the object-dtype based methods as # fallback for the ones that pyarrow doesn't yet support @@ -161,7 +150,8 @@ def __init__(self, values): def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): from pandas.core.arrays.masked import BaseMaskedArray - _chk_pyarrow_available() + msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + import_optional_dependency("pyarrow", msg) if dtype and not (isinstance(dtype, str) and dtype == "string"): dtype = pandas_dtype(dtype) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index d66a603ad568c..788324087448b 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -6,7 +6,7 @@ import pandas as pd import pandas._testing as tm -pa = pytest.importorskip("pyarrow", minversion="0.17.0") +pa = pytest.importorskip("pyarrow", minversion="1.0.1") from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask @@ -29,7 +29,6 @@ def test_arrow_array(data): assert arr.equals(expected) -@td.skip_if_no("pyarrow") def test_arrow_roundtrip(data): df = pd.DataFrame({"a": data}) table = pa.table(df) @@ -39,7 +38,6 @@ def test_arrow_roundtrip(data): tm.assert_frame_equal(result, df) -@td.skip_if_no("pyarrow") def test_arrow_load_from_zero_chunks(data): # GH-41040 @@ -54,7 +52,6 @@ def test_arrow_load_from_zero_chunks(data): tm.assert_frame_equal(result, df) -@td.skip_if_no("pyarrow") def test_arrow_from_arrow_uint(): # https://github.com/pandas-dev/pandas/issues/31896 # possible mismatch in types @@ -66,7 +63,6 @@ def test_arrow_from_arrow_uint(): tm.assert_extension_array_equal(result, expected) -@td.skip_if_no("pyarrow") def test_arrow_sliced(data): # https://github.com/pandas-dev/pandas/issues/38525 @@ -161,7 +157,6 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays): tm.assert_numpy_array_equal(mask, mask_expected_empty) -@td.skip_if_no("pyarrow") def test_from_arrow_type_error(request, data): # ensure that __from_arrow__ returns a TypeError when getting a wrong # array type diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 5211397f20c36..51762ed82c980 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -11,10 +11,9 @@ period_array, ) -pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") -@pyarrow_skip def test_arrow_extension_type(): from pandas.core.arrays._arrow_utils import ArrowPeriodType @@ -29,7 +28,6 @@ def test_arrow_extension_type(): assert not hash(p1) == hash(p3) -@pyarrow_skip @pytest.mark.parametrize( "data, freq", [ @@ -38,97 +36,84 @@ def test_arrow_extension_type(): ], ) def test_arrow_array(data, freq): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType periods = period_array(data, freq=freq) - result = pa.array(periods) + result = pyarrow.array(periods) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == freq - expected = pa.array(periods.asi8, type="int64") + expected = pyarrow.array(periods.asi8, type="int64") assert result.storage.equals(expected) # convert to its storage type - result = pa.array(periods, type=pa.int64()) + result = pyarrow.array(periods, type=pyarrow.int64()) assert result.equals(expected) # unsupported conversions msg = "Not supported to convert PeriodArray to 'double' type" with pytest.raises(TypeError, match=msg): - pa.array(periods, type="float64") + pyarrow.array(periods, type="float64") with pytest.raises(TypeError, match="different 'freq'"): - pa.array(periods, type=ArrowPeriodType("T")) + pyarrow.array(periods, type=ArrowPeriodType("T")) -@pyarrow_skip def test_arrow_array_missing(): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") arr[1] = pd.NaT - result = pa.array(arr) + result = pyarrow.array(arr) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == "D" - expected = pa.array([1, None, 3], type="int64") + expected = pyarrow.array([1, None, 3], type="int64") assert result.storage.equals(expected) -@pyarrow_skip def test_arrow_table_roundtrip(): - import pyarrow as pa - from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([1, 2, 3], freq="D") arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = pa.table(df) + table = pyarrow.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) - table2 = pa.concat_tables([table, table]) + table2 = pyarrow.concat_tables([table, table]) result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) -@pyarrow_skip def test_arrow_load_from_zero_chunks(): # GH-41040 - import pyarrow as pa from pandas.core.arrays._arrow_utils import ArrowPeriodType arr = PeriodArray([], freq="D") df = pd.DataFrame({"a": arr}) - table = pa.table(df) + table = pyarrow.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) - table = pa.table( - [pa.chunked_array([], type=table.column(0).type)], schema=table.schema + table = pyarrow.table( + [pyarrow.chunked_array([], type=table.column(0).type)], schema=table.schema ) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) -@pyarrow_skip def test_arrow_table_roundtrip_without_metadata(): - import pyarrow as pa - arr = PeriodArray([1, 2, 3], freq="H") arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = pa.table(df) + table = pyarrow.table(df) # remove the metadata table = table.replace_schema_metadata() assert table.schema.metadata is None diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index c3f951adf7f89..35b2e73131e39 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import pa_version_under1p0 +from pandas.compat._optional import import_optional_dependency import pandas as pd import pandas._testing as tm @@ -13,8 +13,9 @@ ) from pandas.core.arrays.string_arrow import ArrowStringArray +pyarrow = import_optional_dependency("pyarrow") skip_if_no_pyarrow = pytest.mark.skipif( - pa_version_under1p0, + pyarrow is None, reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", ) @@ -118,7 +119,7 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - not pa_version_under1p0, + pyarrow is not None, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): diff --git a/pandas/tests/extension/arrow/test_bool.py b/pandas/tests/extension/arrow/test_bool.py index d262f09182a9c..320bfc13f7032 100644 --- a/pandas/tests/extension/arrow/test_bool.py +++ b/pandas/tests/extension/arrow/test_bool.py @@ -6,7 +6,7 @@ from pandas.api.types import is_bool_dtype from pandas.tests.extension import base -pytest.importorskip("pyarrow", minversion="0.13.0") +pytest.importorskip("pyarrow", minversion="1.0.1") from pandas.tests.extension.arrow.arrays import ( # isort:skip ArrowBoolArray, diff --git a/pandas/tests/extension/arrow/test_timestamp.py b/pandas/tests/extension/arrow/test_timestamp.py index c61cc30950a23..fe2c484731019 100644 --- a/pandas/tests/extension/arrow/test_timestamp.py +++ b/pandas/tests/extension/arrow/test_timestamp.py @@ -12,7 +12,7 @@ register_extension_dtype, ) -pytest.importorskip("pyarrow", minversion="0.13.0") +pytest.importorskip("pyarrow", minversion="1.0.1") import pyarrow as pa # isort:skip diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index ba8a9ed070236..5e16dd752bb98 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -9,7 +9,7 @@ from pandas.io.feather_format import read_feather, to_feather # isort:skip -pyarrow = pytest.importorskip("pyarrow") +pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse") @@ -122,7 +122,6 @@ def test_read_columns(self): columns = ["col1", "col3"] self.check_round_trip(df, expected=df[columns], columns=columns) - @td.skip_if_no("pyarrow", min_version="0.17.1") def read_columns_different_order(self): # GH 33878 df = pd.DataFrame({"A": [1, 2], "B": ["x", "y"], "C": [True, False]}) @@ -182,12 +181,10 @@ def test_path_localpath(self): result = tm.round_trip_localpath(df.to_feather, read_feather) tm.assert_frame_equal(df, result) - @td.skip_if_no("pyarrow", min_version="0.17.0") def test_passthrough_keywords(self): df = tm.makeDataFrame().reset_index() self.check_round_trip(df, write_kwargs={"version": 1}) - @td.skip_if_no("pyarrow") @tm.network def test_http_path(self, feather_file): # GH 29055 diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index ec724602c5249..270cb402483bf 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -15,7 +15,6 @@ from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( - pa_version_under1p0, pa_version_under2p0, pa_version_under5p0, ) @@ -784,11 +783,7 @@ def test_s3_roundtrip_for_dir( # only used if partition field is string, but this changed again to use # category dtype for all types (not only strings) in pyarrow 2.0.0 if partition_col: - partition_col_type = ( - "int32" - if (not pa_version_under1p0) and pa_version_under2p0 - else "category" - ) + partition_col_type = "int32" if pa_version_under2p0 else "category" expected_df[partition_col] = expected_df[partition_col].astype( partition_col_type From adb921640db441bfb0603bc8bcc36c45a1ed0b8f Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 21:33:13 -0500 Subject: [PATCH 2/9] remove unused import --- pandas/tests/arrays/masked/test_arrow_compat.py | 2 -- pandas/tests/arrays/period/test_arrow_compat.py | 2 -- pandas/tests/io/test_feather.py | 2 -- 3 files changed, 6 deletions(-) diff --git a/pandas/tests/arrays/masked/test_arrow_compat.py b/pandas/tests/arrays/masked/test_arrow_compat.py index 788324087448b..3f0a1b5d0eaf3 100644 --- a/pandas/tests/arrays/masked/test_arrow_compat.py +++ b/pandas/tests/arrays/masked/test_arrow_compat.py @@ -1,8 +1,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 51762ed82c980..52fc4071cdc3c 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -1,7 +1,5 @@ import pytest -import pandas.util._test_decorators as td - from pandas.core.dtypes.dtypes import PeriodDtype import pandas as pd diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 5e16dd752bb98..17347b5cfd983 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -2,8 +2,6 @@ import numpy as np import pytest -import pandas.util._test_decorators as td - import pandas as pd import pandas._testing as tm From 9b90c36f1a691d48f71adeb769fe0423cd8b5685 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 22:08:11 -0500 Subject: [PATCH 3/9] fix import --- pandas/core/arrays/string_arrow.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 6eb9f2c21a005..0d055d76ae9da 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -11,8 +11,6 @@ ) import numpy as np -import pyarrow as pa -import pyarrow.compute as pc from pandas._libs import ( lib, @@ -67,6 +65,14 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin + +msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." +pyarrow = import_optional_dependency("pyarrow", msg) +if pyarrow is not None: + import pyarrow.compute as pc + + pa = pyarrow + ARROW_CMP_FUNCS = { "eq": pc.equal, "ne": pc.not_equal, @@ -150,9 +156,6 @@ def __init__(self, values): def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): from pandas.core.arrays.masked import BaseMaskedArray - msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." - import_optional_dependency("pyarrow", msg) - if dtype and not (isinstance(dtype, str) and dtype == "string"): dtype = pandas_dtype(dtype) assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow" From 21c0a346c69daeb1673dea03d8c6ec9ab4fc18d0 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 22:16:03 -0500 Subject: [PATCH 4/9] import --- pandas/core/arrays/string_arrow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 0d055d76ae9da..5341128b5c510 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -65,7 +65,6 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin - msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." pyarrow = import_optional_dependency("pyarrow", msg) if pyarrow is not None: From a0607a725e513ae9baa80d270f8bb992e7ad6d49 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 22:44:45 -0500 Subject: [PATCH 5/9] fix import --- ci/deps/actions-38-locale.yaml | 2 +- pandas/compat/pyarrow.py | 2 ++ pandas/core/arrays/string_arrow.py | 25 ++++++++----------- .../tests/arrays/string_/test_string_arrow.py | 7 +++--- 4 files changed, 17 insertions(+), 19 deletions(-) diff --git a/ci/deps/actions-38-locale.yaml b/ci/deps/actions-38-locale.yaml index 13b132109effb..b7043735d9457 100644 --- a/ci/deps/actions-38-locale.yaml +++ b/ci/deps/actions-38-locale.yaml @@ -35,7 +35,7 @@ dependencies: - xlsxwriter - xlwt - moto - - pyarrow=1.0.0 + - pyarrow=1.0.1 - pip - pip: - pyxlsb diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 112ebe654117d..48a047f5112f3 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -7,11 +7,13 @@ _pa_version = pa.__version__ _palv = Version(_pa_version) + pa_version_under1p01 = _palv < Version("1.0.1") pa_version_under2p0 = _palv < Version("2.0.0") pa_version_under3p0 = _palv < Version("3.0.0") pa_version_under4p0 = _palv < Version("4.0.0") pa_version_under5p0 = _palv < Version("5.0.0") except ImportError: + pa_version_under1p01 = True pa_version_under2p0 = True pa_version_under3p0 = True pa_version_under4p0 = True diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 5341128b5c510..f94dcae83fd41 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -27,11 +27,11 @@ npt, ) from pandas.compat import ( + pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, ) -from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs @@ -65,21 +65,18 @@ ) from pandas.core.strings.object_array import ObjectStringArrayMixin -msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." -pyarrow = import_optional_dependency("pyarrow", msg) -if pyarrow is not None: +if not pa_version_under1p01: + import pyarrow as pa import pyarrow.compute as pc - pa = pyarrow - -ARROW_CMP_FUNCS = { - "eq": pc.equal, - "ne": pc.not_equal, - "lt": pc.less, - "gt": pc.greater, - "le": pc.less_equal, - "ge": pc.greater_equal, -} + ARROW_CMP_FUNCS = { + "eq": pc.equal, + "ne": pc.not_equal, + "lt": pc.less, + "gt": pc.greater, + "le": pc.less_equal, + "ge": pc.greater_equal, + } if TYPE_CHECKING: diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 35b2e73131e39..203374be2473b 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat._optional import import_optional_dependency +from pandas.compat.pyarrow import pa_version_under1p01 import pandas as pd import pandas._testing as tm @@ -13,9 +13,8 @@ ) from pandas.core.arrays.string_arrow import ArrowStringArray -pyarrow = import_optional_dependency("pyarrow") skip_if_no_pyarrow = pytest.mark.skipif( - pyarrow is None, + pa_version_under1p01, reason="pyarrow>=1.0.0 is required for PyArrow backed StringArray", ) @@ -119,7 +118,7 @@ def test_from_sequence_wrong_dtype_raises(): @pytest.mark.skipif( - pyarrow is not None, + not pa_version_under1p01, reason="pyarrow is installed", ) def test_pyarrow_not_installed_raises(): From 77abfa88719fa165bdc52c953643542a3e45b8ac Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 22:58:56 -0500 Subject: [PATCH 6/9] revert changes --- pandas/compat/__init__.py | 2 ++ .../tests/arrays/period/test_arrow_compat.py | 28 +++++++++---------- .../tests/arrays/string_/test_string_arrow.py | 2 +- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index d38f3c8179310..500bdabedf666 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -20,6 +20,7 @@ np_version_under1p20, ) from pandas.compat.pyarrow import ( + pa_version_under1p01, pa_version_under2p0, pa_version_under3p0, pa_version_under4p0, @@ -150,6 +151,7 @@ def get_lzma_file(lzma): "np_datetime64_compat", "np_version_under1p19", "np_version_under1p20", + "pa_version_under1p01", "pa_version_under2p0", "pa_version_under3p0", "pa_version_under4p0", diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 52fc4071cdc3c..6247ddb0599a8 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -9,7 +9,7 @@ period_array, ) -pyarrow = pytest.importorskip("pyarrow", minversion="1.0.1") +py = pytest.importorskip("pyarrow", minversion="1.0.1") def test_arrow_extension_type(): @@ -37,23 +37,23 @@ def test_arrow_array(data, freq): from pandas.core.arrays._arrow_utils import ArrowPeriodType periods = period_array(data, freq=freq) - result = pyarrow.array(periods) + result = py.array(periods) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == freq - expected = pyarrow.array(periods.asi8, type="int64") + expected = py.array(periods.asi8, type="int64") assert result.storage.equals(expected) # convert to its storage type - result = pyarrow.array(periods, type=pyarrow.int64()) + result = py.array(periods, type=py.int64()) assert result.equals(expected) # unsupported conversions msg = "Not supported to convert PeriodArray to 'double' type" with pytest.raises(TypeError, match=msg): - pyarrow.array(periods, type="float64") + py.array(periods, type="float64") with pytest.raises(TypeError, match="different 'freq'"): - pyarrow.array(periods, type=ArrowPeriodType("T")) + py.array(periods, type=ArrowPeriodType("T")) def test_arrow_array_missing(): @@ -62,10 +62,10 @@ def test_arrow_array_missing(): arr = PeriodArray([1, 2, 3], freq="D") arr[1] = pd.NaT - result = pyarrow.array(arr) + result = py.array(arr) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == "D" - expected = pyarrow.array([1, None, 3], type="int64") + expected = py.array([1, None, 3], type="int64") assert result.storage.equals(expected) @@ -76,13 +76,13 @@ def test_arrow_table_roundtrip(): arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = pyarrow.table(df) + table = py.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) - table2 = pyarrow.concat_tables([table, table]) + table2 = py.concat_tables([table, table]) result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -96,10 +96,10 @@ def test_arrow_load_from_zero_chunks(): arr = PeriodArray([], freq="D") df = pd.DataFrame({"a": arr}) - table = pyarrow.table(df) + table = py.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) - table = pyarrow.table( - [pyarrow.chunked_array([], type=table.column(0).type)], schema=table.schema + table = py.table( + [py.chunked_array([], type=table.column(0).type)], schema=table.schema ) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) @@ -111,7 +111,7 @@ def test_arrow_table_roundtrip_without_metadata(): arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = pyarrow.table(df) + table = py.table(df) # remove the metadata table = table.replace_schema_metadata() assert table.schema.metadata is None diff --git a/pandas/tests/arrays/string_/test_string_arrow.py b/pandas/tests/arrays/string_/test_string_arrow.py index 203374be2473b..265afa89d6530 100644 --- a/pandas/tests/arrays/string_/test_string_arrow.py +++ b/pandas/tests/arrays/string_/test_string_arrow.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat.pyarrow import pa_version_under1p01 +from pandas.compat import pa_version_under1p01 import pandas as pd import pandas._testing as tm From 1ba2a3e4c392daa7acd7606de13580def19e9cb9 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 21 Oct 2021 23:00:50 -0500 Subject: [PATCH 7/9] revert changes --- .../tests/arrays/period/test_arrow_compat.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/pandas/tests/arrays/period/test_arrow_compat.py b/pandas/tests/arrays/period/test_arrow_compat.py index 6247ddb0599a8..560299a4a47f5 100644 --- a/pandas/tests/arrays/period/test_arrow_compat.py +++ b/pandas/tests/arrays/period/test_arrow_compat.py @@ -9,7 +9,7 @@ period_array, ) -py = pytest.importorskip("pyarrow", minversion="1.0.1") +pa = pytest.importorskip("pyarrow", minversion="1.0.1") def test_arrow_extension_type(): @@ -37,23 +37,23 @@ def test_arrow_array(data, freq): from pandas.core.arrays._arrow_utils import ArrowPeriodType periods = period_array(data, freq=freq) - result = py.array(periods) + result = pa.array(periods) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == freq - expected = py.array(periods.asi8, type="int64") + expected = pa.array(periods.asi8, type="int64") assert result.storage.equals(expected) # convert to its storage type - result = py.array(periods, type=py.int64()) + result = pa.array(periods, type=pa.int64()) assert result.equals(expected) # unsupported conversions msg = "Not supported to convert PeriodArray to 'double' type" with pytest.raises(TypeError, match=msg): - py.array(periods, type="float64") + pa.array(periods, type="float64") with pytest.raises(TypeError, match="different 'freq'"): - py.array(periods, type=ArrowPeriodType("T")) + pa.array(periods, type=ArrowPeriodType("T")) def test_arrow_array_missing(): @@ -62,10 +62,10 @@ def test_arrow_array_missing(): arr = PeriodArray([1, 2, 3], freq="D") arr[1] = pd.NaT - result = py.array(arr) + result = pa.array(arr) assert isinstance(result.type, ArrowPeriodType) assert result.type.freq == "D" - expected = py.array([1, None, 3], type="int64") + expected = pa.array([1, None, 3], type="int64") assert result.storage.equals(expected) @@ -76,13 +76,13 @@ def test_arrow_table_roundtrip(): arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = py.table(df) + table = pa.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) tm.assert_frame_equal(result, df) - table2 = py.concat_tables([table, table]) + table2 = pa.concat_tables([table, table]) result = table2.to_pandas() expected = pd.concat([df, df], ignore_index=True) tm.assert_frame_equal(result, expected) @@ -96,10 +96,10 @@ def test_arrow_load_from_zero_chunks(): arr = PeriodArray([], freq="D") df = pd.DataFrame({"a": arr}) - table = py.table(df) + table = pa.table(df) assert isinstance(table.field("a").type, ArrowPeriodType) - table = py.table( - [py.chunked_array([], type=table.column(0).type)], schema=table.schema + table = pa.table( + [pa.chunked_array([], type=table.column(0).type)], schema=table.schema ) result = table.to_pandas() assert isinstance(result["a"].dtype, PeriodDtype) @@ -111,7 +111,7 @@ def test_arrow_table_roundtrip_without_metadata(): arr[1] = pd.NaT df = pd.DataFrame({"a": arr}) - table = py.table(df) + table = pa.table(df) # remove the metadata table = table.replace_schema_metadata() assert table.schema.metadata is None From 2ac84151198ef4e44ad8cb63a96cce5298ca9ab6 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 28 Oct 2021 07:26:47 -0500 Subject: [PATCH 8/9] revert changes --- pandas/core/arrays/string_.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 08eb14cd3ff37..5a8e5f488fbf2 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -19,6 +19,7 @@ Scalar, type_t, ) +from pandas.compat import pa_version_under1p01 from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ( @@ -103,6 +104,10 @@ def __init__(self, storage=None): raise ValueError( f"Storage must be 'python' or 'pyarrow'. Got {storage} instead." ) + if storage == "pyarrow" and pa_version_under1p01: + raise ImportError( + "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + ) self.storage = storage @property From 1756dbd840062680c2b2ed2c0c561a54f6468109 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Tue, 9 Nov 2021 13:48:21 -0600 Subject: [PATCH 9/9] revert changes --- pandas/core/arrays/string_arrow.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index a77a4753695a7..b1daf0e393ef0 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -83,6 +83,12 @@ ArrowStringScalarOrNAT = Union[str, libmissing.NAType] +def _chk_pyarrow_available() -> None: + if pa_version_under1p01: + msg = "pyarrow>=1.0.0 is required for PyArrow backed StringArray." + raise ImportError(msg) + + # TODO: Inherit directly from BaseStringArrayMethods. Currently we inherit from # ObjectStringArrayMixin because we want to have the object-dtype based methods as # fallback for the ones that pyarrow doesn't yet support @@ -150,6 +156,8 @@ def __init__(self, values): def _from_sequence(cls, scalars, dtype: Dtype | None = None, copy: bool = False): from pandas.core.arrays.masked import BaseMaskedArray + _chk_pyarrow_available() + if dtype and not (isinstance(dtype, str) and dtype == "string"): dtype = pandas_dtype(dtype) assert isinstance(dtype, StringDtype) and dtype.storage == "pyarrow"