Skip to content

Upgrade pyarrow minimum version to 7.0 #51151

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies:
- openpyxl=3.0.7
- pandas-gbq=0.15.0
- psycopg2=2.8.6
- pyarrow=6.0.0
- pyarrow=7.0.0
- pymysql=1.0.2
- pyreadstat=1.1.2
- pytables=3.6.1
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ PyTables 3.6.1 hdf5 HDF5-based reading
blosc 1.21.0 hdf5 Compression for HDF5; only available on ``conda``
zlib hdf5 Compression for HDF5
fastparquet 0.6.3 - Parquet reading / writing (pyarrow is default)
pyarrow 6.0.0 parquet, feather Parquet, ORC, and feather reading / writing
pyarrow 7.0.0 parquet, feather Parquet, ORC, and feather reading / writing
pyreadstat 1.1.2 spss SPSS files (.sav) reading
odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing
========================= ================== ================ =============================================================
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| Package | Minimum Version | Changed |
+=================+=================+=========+
| pyarrow | 6.0.0 | X |
| pyarrow | 7.0.0 | X |
+-----------------+-----------------+---------+
| matplotlib | 3.6.1 | X |
+-----------------+-----------------+---------+
Expand Down
4 changes: 2 additions & 2 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
Frequency,
NpDtype,
)
from pandas.compat import pa_version_under6p0
from pandas.compat import pa_version_under7p0

from pandas.core.dtypes.common import (
is_float_dtype,
Expand Down Expand Up @@ -191,7 +191,7 @@
]
]

if not pa_version_under6p0:
if not pa_version_under7p0:
import pyarrow as pa

UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]
Expand Down
2 changes: 0 additions & 2 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
np_version_under1p21,
)
from pandas.compat.pyarrow import (
pa_version_under6p0,
pa_version_under7p0,
pa_version_under8p0,
pa_version_under9p0,
Expand Down Expand Up @@ -157,7 +156,6 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
__all__ = [
"is_numpy_dev",
"np_version_under1p21",
"pa_version_under6p0",
"pa_version_under7p0",
"pa_version_under8p0",
"pa_version_under9p0",
Expand Down
2 changes: 1 addition & 1 deletion pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"pandas_gbq": "0.15.0",
"psycopg2": "2.8.6", # (dt dec pq3 ext lo64)
"pymysql": "1.0.2",
"pyarrow": "6.0.0",
"pyarrow": "7.0.0",
"pyreadstat": "1.1.2",
"pytest": "7.0.0",
"pyxlsb": "1.0.8",
Expand Down
2 changes: 0 additions & 2 deletions pandas/compat/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,11 @@

_pa_version = pa.__version__
_palv = Version(_pa_version)
pa_version_under6p0 = _palv < Version("6.0.0")
pa_version_under7p0 = _palv < Version("7.0.0")
pa_version_under8p0 = _palv < Version("8.0.0")
pa_version_under9p0 = _palv < Version("9.0.0")
pa_version_under10p0 = _palv < Version("10.0.0")
except ImportError:
pa_version_under6p0 = True
pa_version_under7p0 = True
pa_version_under8p0 = True
pa_version_under9p0 = True
Expand Down
17 changes: 4 additions & 13 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
npt,
)
from pandas.compat import (
pa_version_under6p0,
pa_version_under7p0,
pa_version_under8p0,
pa_version_under9p0,
Expand Down Expand Up @@ -54,7 +53,7 @@
validate_indices,
)

if not pa_version_under6p0:
if not pa_version_under7p0:
import pyarrow as pa
import pyarrow.compute as pc

Expand Down Expand Up @@ -199,8 +198,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray):
_dtype: ArrowDtype

def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
if pa_version_under6p0:
msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray."
if pa_version_under7p0:
msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray."
raise ImportError(msg)
if isinstance(values, pa.Array):
self._data = pa.chunked_array([values])
Expand Down Expand Up @@ -529,11 +528,6 @@ def _argmin_max(self, skipna: bool, method: str) -> int:
# let ExtensionArray.arg{max|min} raise
return getattr(super(), f"arg{method}")(skipna=skipna)

if pa_version_under6p0:
raise NotImplementedError(
f"arg{method} only implemented for pyarrow version >= 6.0"
)

data = self._data
if pa.types.is_duration(data.type):
data = data.cast(pa.int64())
Expand Down Expand Up @@ -567,7 +561,7 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
-------
ArrowExtensionArray
"""
if pa_version_under6p0:
if pa_version_under7p0:
fallback_performancewarning(version="6")
return super().dropna()
else:
Expand Down Expand Up @@ -1293,9 +1287,6 @@ def _mode(self: ArrowExtensionArrayT, dropna: bool = True) -> ArrowExtensionArra
same type as self
Sorted, if possible.
"""
if pa_version_under6p0:
raise NotImplementedError("mode only supported for pyarrow version >= 6.0")

pa_type = self._data.type
if pa.types.is_temporal(pa_type):
nbits = pa_type.bit_width
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/arrow/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import numpy as np

from pandas._typing import DtypeObj
from pandas.compat import pa_version_under6p0
from pandas.compat import pa_version_under7p0
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.base import (
StorageExtensionDtype,
register_extension_dtype,
)

if not pa_version_under6p0:
if not pa_version_under7p0:
import pyarrow as pa


Expand Down Expand Up @@ -66,8 +66,8 @@ class ArrowDtype(StorageExtensionDtype):

def __init__(self, pyarrow_dtype: pa.DataType) -> None:
super().__init__("pyarrow")
if pa_version_under6p0:
raise ImportError("pyarrow>=6.0.0 is required for ArrowDtype")
if pa_version_under7p0:
raise ImportError("pyarrow>=7.0.0 is required for ArrowDtype")
if not isinstance(pyarrow_dtype, pa.DataType):
raise ValueError(
f"pyarrow_dtype ({pyarrow_dtype}) must be an instance "
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
npt,
type_t,
)
from pandas.compat import pa_version_under6p0
from pandas.compat import pa_version_under7p0
from pandas.compat.numpy import function as nv
from pandas.util._decorators import doc

Expand Down Expand Up @@ -115,9 +115,9 @@ def __init__(self, storage=None) -> None:
raise ValueError(
f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
)
if storage == "pyarrow" and pa_version_under6p0:
if storage == "pyarrow" and pa_version_under7p0:
raise ImportError(
"pyarrow>=6.0.0 is required for PyArrow backed StringArray."
"pyarrow>=7.0.0 is required for PyArrow backed StringArray."
)
self.storage = storage

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/arrays/string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
Scalar,
npt,
)
from pandas.compat import pa_version_under6p0
from pandas.compat import pa_version_under7p0

from pandas.core.dtypes.common import (
is_bool_dtype,
Expand All @@ -40,7 +40,7 @@
)
from pandas.core.strings.object_array import ObjectStringArrayMixin

if not pa_version_under6p0:
if not pa_version_under7p0:
import pyarrow as pa
import pyarrow.compute as pc

Expand All @@ -50,8 +50,8 @@


def _chk_pyarrow_available() -> None:
if pa_version_under6p0:
msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray."
if pa_version_under7p0:
msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray."
raise ImportError(msg)


Expand Down
8 changes: 1 addition & 7 deletions pandas/tests/arrays/string_/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under6p0
import pandas.util._test_decorators as td

from pandas.core.dtypes.common import is_dtype_equal
Expand Down Expand Up @@ -358,11 +357,6 @@ def test_reduce_missing(skipna, dtype):
@pytest.mark.parametrize("method", ["min", "max"])
@pytest.mark.parametrize("skipna", [True, False])
def test_min_max(method, skipna, dtype, request):
if dtype.storage == "pyarrow" and pa_version_under6p0:
reason = "'ArrowStringArray' object has no attribute 'max'"
mark = pytest.mark.xfail(raises=TypeError, reason=reason)
request.node.add_marker(mark)

arr = pd.Series(["a", "b", "c", None], dtype=dtype)
result = getattr(arr, method)(skipna=skipna)
if skipna:
Expand All @@ -375,7 +369,7 @@ def test_min_max(method, skipna, dtype, request):
@pytest.mark.parametrize("method", ["min", "max"])
@pytest.mark.parametrize("box", [pd.Series, pd.array])
def test_min_max_numpy(method, box, dtype, request):
if dtype.storage == "pyarrow" and (pa_version_under6p0 or box is pd.array):
if dtype.storage == "pyarrow" and box is pd.array:
if box is pd.array:
reason = "'<=' not supported between instances of 'str' and 'NoneType'"
else:
Expand Down
10 changes: 5 additions & 5 deletions pandas/tests/arrays/string_/test_string_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under6p0
from pandas.compat import pa_version_under7p0

import pandas as pd
import pandas._testing as tm
Expand All @@ -15,8 +15,8 @@
from pandas.core.arrays.string_arrow import ArrowStringArray

skip_if_no_pyarrow = pytest.mark.skipif(
pa_version_under6p0,
reason="pyarrow>=6.0.0 is required for PyArrow backed StringArray",
pa_version_under7p0,
reason="pyarrow>=7.0.0 is required for PyArrow backed StringArray",
)


Expand Down Expand Up @@ -119,11 +119,11 @@ def test_from_sequence_wrong_dtype_raises():


@pytest.mark.skipif(
not pa_version_under6p0,
not pa_version_under7p0,
reason="pyarrow is installed",
)
def test_pyarrow_not_installed_raises():
msg = re.escape("pyarrow>=6.0.0 is required for PyArrow backed")
msg = re.escape("pyarrow>=7.0.0 is required for PyArrow backed")

with pytest.raises(ImportError, match=msg):
StringDtype(storage="pyarrow")
Expand Down
Loading