Skip to content

Commit d8a1a0d

Browse files
authored
Upgrade pyarrow minimum version to 7.0 (#51151)
* Upgrade ci minimum version to 7.0 * Fix * Remove * Add cond back in * Remove super * Remove if * Fix merge
1 parent 8478cf6 commit d8a1a0d

File tree

21 files changed

+51
-160
lines changed

21 files changed

+51
-160
lines changed

ci/deps/actions-38-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ dependencies:
4343
- openpyxl=3.0.7
4444
- pandas-gbq=0.15.0
4545
- psycopg2=2.8.6
46-
- pyarrow=6.0.0
46+
- pyarrow=7.0.0
4747
- pymysql=1.0.2
4848
- pyreadstat=1.1.2
4949
- pytables=3.6.1

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ PyTables 3.6.1 hdf5 HDF5-based reading
441441
blosc 1.21.0 hdf5 Compression for HDF5; only available on ``conda``
442442
zlib hdf5 Compression for HDF5
443443
fastparquet 0.6.3 - Parquet reading / writing (pyarrow is default)
444-
pyarrow 6.0.0 parquet, feather Parquet, ORC, and feather reading / writing
444+
pyarrow 7.0.0 parquet, feather Parquet, ORC, and feather reading / writing
445445
pyreadstat 1.1.2 spss SPSS files (.sav) reading
446446
odfpy 1.4.1 excel Open document format (.odf, .ods, .odt) reading / writing
447447
========================= ================== ================ =============================================================

doc/source/whatsnew/v2.0.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -668,7 +668,7 @@ Optional libraries below the lowest tested version may still work, but are not c
668668
+-----------------+-----------------+---------+
669669
| Package | Minimum Version | Changed |
670670
+=================+=================+=========+
671-
| pyarrow | 6.0.0 | X |
671+
| pyarrow | 7.0.0 | X |
672672
+-----------------+-----------------+---------+
673673
| matplotlib | 3.6.1 | X |
674674
+-----------------+-----------------+---------+

pandas/_testing/__init__.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
Frequency,
3030
NpDtype,
3131
)
32-
from pandas.compat import pa_version_under6p0
32+
from pandas.compat import pa_version_under7p0
3333

3434
from pandas.core.dtypes.common import (
3535
is_float_dtype,
@@ -191,7 +191,7 @@
191191
]
192192
]
193193

194-
if not pa_version_under6p0:
194+
if not pa_version_under7p0:
195195
import pyarrow as pa
196196

197197
UNSIGNED_INT_PYARROW_DTYPES = [pa.uint8(), pa.uint16(), pa.uint32(), pa.uint64()]

pandas/compat/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
np_version_under1p21,
2828
)
2929
from pandas.compat.pyarrow import (
30-
pa_version_under6p0,
3130
pa_version_under7p0,
3231
pa_version_under8p0,
3332
pa_version_under9p0,
@@ -157,7 +156,6 @@ def get_lzma_file() -> type[pandas.compat.compressors.LZMAFile]:
157156
__all__ = [
158157
"is_numpy_dev",
159158
"np_version_under1p21",
160-
"pa_version_under6p0",
161159
"pa_version_under7p0",
162160
"pa_version_under8p0",
163161
"pa_version_under9p0",

pandas/compat/_optional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
"pandas_gbq": "0.15.0",
3232
"psycopg2": "2.8.6", # (dt dec pq3 ext lo64)
3333
"pymysql": "1.0.2",
34-
"pyarrow": "6.0.0",
34+
"pyarrow": "7.0.0",
3535
"pyreadstat": "1.1.2",
3636
"pytest": "7.0.0",
3737
"pyxlsb": "1.0.8",

pandas/compat/pyarrow.py

-2
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,11 @@
99

1010
_pa_version = pa.__version__
1111
_palv = Version(_pa_version)
12-
pa_version_under6p0 = _palv < Version("6.0.0")
1312
pa_version_under7p0 = _palv < Version("7.0.0")
1413
pa_version_under8p0 = _palv < Version("8.0.0")
1514
pa_version_under9p0 = _palv < Version("9.0.0")
1615
pa_version_under10p0 = _palv < Version("10.0.0")
1716
except ImportError:
18-
pa_version_under6p0 = True
1917
pa_version_under7p0 = True
2018
pa_version_under8p0 = True
2119
pa_version_under9p0 = True

pandas/core/arrays/arrow/array.py

+4-17
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
npt,
2727
)
2828
from pandas.compat import (
29-
pa_version_under6p0,
3029
pa_version_under7p0,
3130
pa_version_under8p0,
3231
pa_version_under9p0,
@@ -54,7 +53,7 @@
5453
validate_indices,
5554
)
5655

57-
if not pa_version_under6p0:
56+
if not pa_version_under7p0:
5857
import pyarrow as pa
5958
import pyarrow.compute as pc
6059

@@ -199,8 +198,8 @@ class ArrowExtensionArray(OpsMixin, ExtensionArray):
199198
_dtype: ArrowDtype
200199

201200
def __init__(self, values: pa.Array | pa.ChunkedArray) -> None:
202-
if pa_version_under6p0:
203-
msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray."
201+
if pa_version_under7p0:
202+
msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray."
204203
raise ImportError(msg)
205204
if isinstance(values, pa.Array):
206205
self._data = pa.chunked_array([values])
@@ -529,11 +528,6 @@ def _argmin_max(self, skipna: bool, method: str) -> int:
529528
# let ExtensionArray.arg{max|min} raise
530529
return getattr(super(), f"arg{method}")(skipna=skipna)
531530

532-
if pa_version_under6p0:
533-
raise NotImplementedError(
534-
f"arg{method} only implemented for pyarrow version >= 6.0"
535-
)
536-
537531
data = self._data
538532
if pa.types.is_duration(data.type):
539533
data = data.cast(pa.int64())
@@ -567,11 +561,7 @@ def dropna(self: ArrowExtensionArrayT) -> ArrowExtensionArrayT:
567561
-------
568562
ArrowExtensionArray
569563
"""
570-
if pa_version_under6p0:
571-
fallback_performancewarning(version="6")
572-
return super().dropna()
573-
else:
574-
return type(self)(pc.drop_null(self._data))
564+
return type(self)(pc.drop_null(self._data))
575565

576566
@doc(ExtensionArray.fillna)
577567
def fillna(
@@ -1293,9 +1283,6 @@ def _mode(self: ArrowExtensionArrayT, dropna: bool = True) -> ArrowExtensionArra
12931283
same type as self
12941284
Sorted, if possible.
12951285
"""
1296-
if pa_version_under6p0:
1297-
raise NotImplementedError("mode only supported for pyarrow version >= 6.0")
1298-
12991286
pa_type = self._data.type
13001287
if pa.types.is_temporal(pa_type):
13011288
nbits = pa_type.bit_width

pandas/core/arrays/arrow/dtype.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
import numpy as np
66

77
from pandas._typing import DtypeObj
8-
from pandas.compat import pa_version_under6p0
8+
from pandas.compat import pa_version_under7p0
99
from pandas.util._decorators import cache_readonly
1010

1111
from pandas.core.dtypes.base import (
1212
StorageExtensionDtype,
1313
register_extension_dtype,
1414
)
1515

16-
if not pa_version_under6p0:
16+
if not pa_version_under7p0:
1717
import pyarrow as pa
1818

1919

@@ -66,8 +66,8 @@ class ArrowDtype(StorageExtensionDtype):
6666

6767
def __init__(self, pyarrow_dtype: pa.DataType) -> None:
6868
super().__init__("pyarrow")
69-
if pa_version_under6p0:
70-
raise ImportError("pyarrow>=6.0.0 is required for ArrowDtype")
69+
if pa_version_under7p0:
70+
raise ImportError("pyarrow>=7.0.0 is required for ArrowDtype")
7171
if not isinstance(pyarrow_dtype, pa.DataType):
7272
raise ValueError(
7373
f"pyarrow_dtype ({pyarrow_dtype}) must be an instance "

pandas/core/arrays/string_.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
npt,
2222
type_t,
2323
)
24-
from pandas.compat import pa_version_under6p0
24+
from pandas.compat import pa_version_under7p0
2525
from pandas.compat.numpy import function as nv
2626
from pandas.util._decorators import doc
2727

@@ -115,9 +115,9 @@ def __init__(self, storage=None) -> None:
115115
raise ValueError(
116116
f"Storage must be 'python' or 'pyarrow'. Got {storage} instead."
117117
)
118-
if storage == "pyarrow" and pa_version_under6p0:
118+
if storage == "pyarrow" and pa_version_under7p0:
119119
raise ImportError(
120-
"pyarrow>=6.0.0 is required for PyArrow backed StringArray."
120+
"pyarrow>=7.0.0 is required for PyArrow backed StringArray."
121121
)
122122
self.storage = storage
123123

pandas/core/arrays/string_arrow.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
Scalar,
1818
npt,
1919
)
20-
from pandas.compat import pa_version_under6p0
20+
from pandas.compat import pa_version_under7p0
2121

2222
from pandas.core.dtypes.common import (
2323
is_bool_dtype,
@@ -40,7 +40,7 @@
4040
)
4141
from pandas.core.strings.object_array import ObjectStringArrayMixin
4242

43-
if not pa_version_under6p0:
43+
if not pa_version_under7p0:
4444
import pyarrow as pa
4545
import pyarrow.compute as pc
4646

@@ -50,8 +50,8 @@
5050

5151

5252
def _chk_pyarrow_available() -> None:
53-
if pa_version_under6p0:
54-
msg = "pyarrow>=6.0.0 is required for PyArrow backed ArrowExtensionArray."
53+
if pa_version_under7p0:
54+
msg = "pyarrow>=7.0.0 is required for PyArrow backed ArrowExtensionArray."
5555
raise ImportError(msg)
5656

5757

pandas/tests/arrays/string_/test_string.py

+1-7
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import numpy as np
66
import pytest
77

8-
from pandas.compat import pa_version_under6p0
98
import pandas.util._test_decorators as td
109

1110
from pandas.core.dtypes.common import is_dtype_equal
@@ -358,11 +357,6 @@ def test_reduce_missing(skipna, dtype):
358357
@pytest.mark.parametrize("method", ["min", "max"])
359358
@pytest.mark.parametrize("skipna", [True, False])
360359
def test_min_max(method, skipna, dtype, request):
361-
if dtype.storage == "pyarrow" and pa_version_under6p0:
362-
reason = "'ArrowStringArray' object has no attribute 'max'"
363-
mark = pytest.mark.xfail(raises=TypeError, reason=reason)
364-
request.node.add_marker(mark)
365-
366360
arr = pd.Series(["a", "b", "c", None], dtype=dtype)
367361
result = getattr(arr, method)(skipna=skipna)
368362
if skipna:
@@ -375,7 +369,7 @@ def test_min_max(method, skipna, dtype, request):
375369
@pytest.mark.parametrize("method", ["min", "max"])
376370
@pytest.mark.parametrize("box", [pd.Series, pd.array])
377371
def test_min_max_numpy(method, box, dtype, request):
378-
if dtype.storage == "pyarrow" and (pa_version_under6p0 or box is pd.array):
372+
if dtype.storage == "pyarrow" and box is pd.array:
379373
if box is pd.array:
380374
reason = "'<=' not supported between instances of 'str' and 'NoneType'"
381375
else:

pandas/tests/arrays/string_/test_string_arrow.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
import pytest
66

7-
from pandas.compat import pa_version_under6p0
7+
from pandas.compat import pa_version_under7p0
88

99
import pandas as pd
1010
import pandas._testing as tm
@@ -15,8 +15,8 @@
1515
from pandas.core.arrays.string_arrow import ArrowStringArray
1616

1717
skip_if_no_pyarrow = pytest.mark.skipif(
18-
pa_version_under6p0,
19-
reason="pyarrow>=6.0.0 is required for PyArrow backed StringArray",
18+
pa_version_under7p0,
19+
reason="pyarrow>=7.0.0 is required for PyArrow backed StringArray",
2020
)
2121

2222

@@ -119,11 +119,11 @@ def test_from_sequence_wrong_dtype_raises():
119119

120120

121121
@pytest.mark.skipif(
122-
not pa_version_under6p0,
122+
not pa_version_under7p0,
123123
reason="pyarrow is installed",
124124
)
125125
def test_pyarrow_not_installed_raises():
126-
msg = re.escape("pyarrow>=6.0.0 is required for PyArrow backed")
126+
msg = re.escape("pyarrow>=7.0.0 is required for PyArrow backed")
127127

128128
with pytest.raises(ImportError, match=msg):
129129
StringDtype(storage="pyarrow")

0 commit comments

Comments
 (0)