Skip to content

Commit 9ae8f1d

Browse files
authored
DEPS: bump pyarrow version to 0.17.0 #38870 (#41476)
1 parent b9195cd commit 9ae8f1d

19 files changed

+46
-65
lines changed

.github/workflows/database.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ jobs:
7070
- uses: conda-incubator/setup-miniconda@v2
7171
with:
7272
activate-environment: pandas-dev
73-
channel-priority: strict
73+
channel-priority: flexible
7474
environment-file: ${{ matrix.ENV_FILE }}
7575
use-only-tar-bz2: true
7676

ci/deps/actions-37-db-min.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ dependencies:
3131
- openpyxl
3232
- pandas-gbq
3333
- google-cloud-bigquery>=1.27.2 # GH 36436
34-
- pyarrow=0.17 # GH 38803
34+
- protobuf>=3.12.4
35+
- pyarrow=0.17.1 # GH 38803
3536
- pytables>=3.5.1
3637
- scipy
3738
- xarray=0.12.3

ci/deps/actions-37-db.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ dependencies:
3131
- pandas-gbq
3232
- google-cloud-bigquery>=1.27.2 # GH 36436
3333
- psycopg2
34-
- pyarrow>=0.15.0
34+
- pyarrow>=0.17.0
3535
- pymysql
3636
- pytables
3737
- python-snappy

ci/deps/actions-37-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ dependencies:
2323
- pytables=3.5.1
2424
- python-dateutil=2.7.3
2525
- pytz=2017.3
26-
- pyarrow=0.15
26+
- pyarrow=0.17.0
2727
- scipy=1.2
2828
- xlrd=1.2.0
2929
- xlsxwriter=1.0.2

ci/deps/actions-37.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- numpy=1.19
1919
- python-dateutil
2020
- nomkl
21-
- pyarrow=0.15.1
21+
- pyarrow
2222
- pytz
2323
- s3fs>=0.4.0
2424
- moto>=1.3.14

ci/deps/azure-macos-37.yaml

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
name: pandas-dev
22
channels:
33
- defaults
4+
- conda-forge
45
dependencies:
56
- python=3.7.*
67

@@ -21,7 +22,7 @@ dependencies:
2122
- numexpr
2223
- numpy=1.17.3
2324
- openpyxl
24-
- pyarrow=0.15.1
25+
- pyarrow=0.17.0
2526
- pytables
2627
- python-dateutil==2.7.3
2728
- pytz

ci/deps/azure-windows-37.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ dependencies:
2626
- numexpr
2727
- numpy=1.17.*
2828
- openpyxl
29-
- pyarrow=0.15
29+
- pyarrow=0.17.0
3030
- pytables
3131
- python-dateutil
3232
- pytz

ci/deps/azure-windows-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ dependencies:
2525
- numpy=1.18.*
2626
- openpyxl
2727
- jinja2
28-
- pyarrow>=0.15.0
28+
- pyarrow>=0.17.0
2929
- pytables
3030
- python-dateutil
3131
- pytz

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ PyTables 3.5.1 HDF5-based reading / writing
358358
blosc 1.17.0 Compression for HDF5
359359
zlib Compression for HDF5
360360
fastparquet 0.4.0 Parquet reading / writing
361-
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
361+
pyarrow 0.17.0 Parquet, ORC, and feather reading / writing
362362
pyreadstat SPSS files (.sav) reading
363363
========================= ================== =============================================================
364364

doc/source/whatsnew/v1.3.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ Optional libraries below the lowest tested version may still work, but are not c
579579
+-----------------+-----------------+---------+
580580
| openpyxl | 3.0.0 | X |
581581
+-----------------+-----------------+---------+
582-
| pyarrow | 0.15.0 | |
582+
| pyarrow | 0.17.0 | X |
583583
+-----------------+-----------------+---------+
584584
| pymysql | 0.8.1 | X |
585585
+-----------------+-----------------+---------+

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ dependencies:
100100
- odfpy
101101

102102
- fastparquet>=0.3.2 # pandas.read_parquet, DataFrame.to_parquet
103-
- pyarrow>=0.15.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
103+
- pyarrow>=0.17.0 # pandas.read_parquet, DataFrame.to_parquet, pandas.read_feather, DataFrame.to_feather
104104
- python-snappy # required by pyarrow
105105

106106
- pyqt>=5.9.2 # pandas.read_clipboard

pandas/compat/_optional.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"odfpy": "1.3.0",
2222
"openpyxl": "3.0.0",
2323
"pandas_gbq": "0.12.0",
24-
"pyarrow": "0.15.0",
24+
"pyarrow": "0.17.0",
2525
"pytest": "5.0.1",
2626
"pyxlsb": "1.0.6",
2727
"s3fs": "0.4.0",

pandas/tests/arrays/interval/test_interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def test_repr():
165165
# Arrow interaction
166166

167167

168-
pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
168+
pyarrow_skip = td.skip_if_no("pyarrow")
169169

170170

171171
@pyarrow_skip

pandas/tests/arrays/masked/test_arrow_compat.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pandas as pd
77
import pandas._testing as tm
88

9-
pa = pytest.importorskip("pyarrow", minversion="0.15.0")
9+
pa = pytest.importorskip("pyarrow", minversion="0.17.0")
1010

1111
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
1212

@@ -21,8 +21,6 @@ def data(request):
2121

2222

2323
def test_arrow_array(data):
24-
# protocol added in 0.15.0
25-
2624
arr = pa.array(data)
2725
expected = pa.array(
2826
data.to_numpy(object, na_value=None),
@@ -31,10 +29,8 @@ def test_arrow_array(data):
3129
assert arr.equals(expected)
3230

3331

34-
@td.skip_if_no("pyarrow", min_version="0.16.0")
32+
@td.skip_if_no("pyarrow")
3533
def test_arrow_roundtrip(data):
36-
# roundtrip possible from arrow 0.16.0
37-
3834
df = pd.DataFrame({"a": data})
3935
table = pa.table(df)
4036
assert table.field("a").type == str(data.dtype.numpy_dtype)
@@ -43,7 +39,7 @@ def test_arrow_roundtrip(data):
4339
tm.assert_frame_equal(result, df)
4440

4541

46-
@td.skip_if_no("pyarrow", min_version="0.16.0")
42+
@td.skip_if_no("pyarrow")
4743
def test_arrow_load_from_zero_chunks(data):
4844
# GH-41040
4945

@@ -58,7 +54,7 @@ def test_arrow_load_from_zero_chunks(data):
5854
tm.assert_frame_equal(result, df)
5955

6056

61-
@td.skip_if_no("pyarrow", min_version="0.16.0")
57+
@td.skip_if_no("pyarrow")
6258
def test_arrow_from_arrow_uint():
6359
# https://github.com/pandas-dev/pandas/issues/31896
6460
# possible mismatch in types
@@ -70,7 +66,7 @@ def test_arrow_from_arrow_uint():
7066
tm.assert_extension_array_equal(result, expected)
7167

7268

73-
@td.skip_if_no("pyarrow", min_version="0.16.0")
69+
@td.skip_if_no("pyarrow")
7470
def test_arrow_sliced(data):
7571
# https://github.com/pandas-dev/pandas/issues/38525
7672

@@ -165,7 +161,7 @@ def test_pyarrow_array_to_numpy_and_mask(np_dtype_to_arrays):
165161
tm.assert_numpy_array_equal(mask, mask_expected_empty)
166162

167163

168-
@td.skip_if_no("pyarrow", min_version="0.16.0")
164+
@td.skip_if_no("pyarrow")
169165
def test_from_arrow_type_error(request, data):
170166
# ensure that __from_arrow__ returns a TypeError when getting a wrong
171167
# array type

pandas/tests/arrays/period/test_arrow_compat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
period_array,
1212
)
1313

14-
pyarrow_skip = pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.16.0")
14+
pyarrow_skip = td.skip_if_no("pyarrow", min_version="0.17.0")
1515

1616

1717
@pyarrow_skip

pandas/tests/arrays/string_/test_string.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def test_fillna_args(dtype, request):
437437
arr.fillna(value=1)
438438

439439

440-
@td.skip_if_no("pyarrow", min_version="0.15.0")
440+
@td.skip_if_no("pyarrow")
441441
def test_arrow_array(dtype):
442442
# protocol added in 0.15.0
443443
import pyarrow as pa
@@ -451,7 +451,7 @@ def test_arrow_array(dtype):
451451
assert arr.equals(expected)
452452

453453

454-
@td.skip_if_no("pyarrow", min_version="0.16.0")
454+
@td.skip_if_no("pyarrow")
455455
def test_arrow_roundtrip(dtype, dtype_object):
456456
# roundtrip possible from arrow 1.0.0
457457
import pyarrow as pa
@@ -467,7 +467,7 @@ def test_arrow_roundtrip(dtype, dtype_object):
467467
assert result.loc[2, "a"] is pd.NA
468468

469469

470-
@td.skip_if_no("pyarrow", min_version="0.16.0")
470+
@td.skip_if_no("pyarrow")
471471
def test_arrow_load_from_zero_chunks(dtype, dtype_object):
472472
# GH-41040
473473
import pyarrow as pa

pandas/tests/io/test_feather.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@
66

77
import pandas as pd
88
import pandas._testing as tm
9-
from pandas.util.version import Version
109

1110
from pandas.io.feather_format import read_feather, to_feather # isort:skip
1211

1312
pyarrow = pytest.importorskip("pyarrow")
1413

1514

16-
pyarrow_version = Version(pyarrow.__version__)
1715
filter_sparse = pytest.mark.filterwarnings("ignore:The Sparse")
1816

1917

@@ -89,12 +87,11 @@ def test_basic(self):
8987
),
9088
}
9189
)
92-
if pyarrow_version >= Version("0.17.0"):
93-
df["periods"] = pd.period_range("2013", freq="M", periods=3)
94-
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
95-
# TODO temporary disable due to regression in pyarrow 0.17.1
96-
# https://github.com/pandas-dev/pandas/issues/34255
97-
# df["intervals"] = pd.interval_range(0, 3, 3)
90+
df["periods"] = pd.period_range("2013", freq="M", periods=3)
91+
df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
92+
# TODO temporary disable due to regression in pyarrow 0.17.1
93+
# https://github.com/pandas-dev/pandas/issues/34255
94+
# df["intervals"] = pd.interval_range(0, 3, 3)
9895

9996
assert df.dttz.dtype.tz.zone == "US/Eastern"
10097
self.check_round_trip(df)

pandas/tests/io/test_parquet.py

+15-29
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
PY38,
1818
is_platform_windows,
1919
)
20+
from pandas.compat.pyarrow import (
21+
pa_version_under1p0,
22+
pa_version_under2p0,
23+
)
2024
import pandas.util._test_decorators as td
2125

2226
import pandas as pd
@@ -653,8 +657,6 @@ def test_categorical(self, pa):
653657
)
654658
def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
655659
s3fs = pytest.importorskip("s3fs")
656-
if Version(pyarrow.__version__) <= Version("0.17.0"):
657-
pytest.skip()
658660
s3 = s3fs.S3FileSystem(**s3so)
659661
kw = {"filesystem": s3}
660662
check_round_trip(
@@ -666,8 +668,6 @@ def test_s3_roundtrip_explicit_fs(self, df_compat, s3_resource, pa, s3so):
666668
)
667669

668670
def test_s3_roundtrip(self, df_compat, s3_resource, pa, s3so):
669-
if Version(pyarrow.__version__) <= Version("0.17.0"):
670-
pytest.skip()
671671
# GH #19134
672672
s3so = {"storage_options": s3so}
673673
check_round_trip(
@@ -698,14 +698,12 @@ def test_s3_roundtrip_for_dir(
698698
# These are added to back of dataframe on read. In new API category dtype is
699699
# only used if partition field is string, but this changed again to use
700700
# category dtype for all types (not only strings) in pyarrow 2.0.0
701-
pa10 = (Version(pyarrow.__version__) >= Version("1.0.0")) and (
702-
Version(pyarrow.__version__) < Version("2.0.0")
703-
)
704701
if partition_col:
705-
if pa10:
706-
partition_col_type = "int32"
707-
else:
708-
partition_col_type = "category"
702+
partition_col_type = (
703+
"int32"
704+
if (not pa_version_under1p0) and pa_version_under2p0
705+
else "category"
706+
)
709707

710708
expected_df[partition_col] = expected_df[partition_col].astype(
711709
partition_col_type
@@ -795,7 +793,7 @@ def test_write_with_schema(self, pa):
795793
out_df = df.astype(bool)
796794
check_round_trip(df, pa, write_kwargs={"schema": schema}, expected=out_df)
797795

798-
@td.skip_if_no("pyarrow", min_version="0.15.0")
796+
@td.skip_if_no("pyarrow")
799797
def test_additional_extension_arrays(self, pa):
800798
# test additional ExtensionArrays that are supported through the
801799
# __arrow_array__ protocol
@@ -806,22 +804,10 @@ def test_additional_extension_arrays(self, pa):
806804
"c": pd.Series(["a", None, "c"], dtype="string"),
807805
}
808806
)
809-
if Version(pyarrow.__version__) >= Version("0.16.0"):
810-
expected = df
811-
else:
812-
# de-serialized as plain int / object
813-
expected = df.assign(
814-
a=df.a.astype("int64"), b=df.b.astype("int64"), c=df.c.astype("object")
815-
)
816-
check_round_trip(df, pa, expected=expected)
807+
check_round_trip(df, pa)
817808

818809
df = pd.DataFrame({"a": pd.Series([1, 2, 3, None], dtype="Int64")})
819-
if Version(pyarrow.__version__) >= Version("0.16.0"):
820-
expected = df
821-
else:
822-
# if missing values in integer, currently de-serialized as float
823-
expected = df.assign(a=df.a.astype("float64"))
824-
check_round_trip(df, pa, expected=expected)
810+
check_round_trip(df, pa)
825811

826812
@td.skip_if_no("pyarrow", min_version="1.0.0")
827813
def test_pyarrow_backed_string_array(self, pa):
@@ -831,7 +817,7 @@ def test_pyarrow_backed_string_array(self, pa):
831817
df = pd.DataFrame({"a": pd.Series(["a", None, "c"], dtype="arrow_string")})
832818
check_round_trip(df, pa, expected=df)
833819

834-
@td.skip_if_no("pyarrow", min_version="0.16.0")
820+
@td.skip_if_no("pyarrow")
835821
def test_additional_extension_types(self, pa):
836822
# test additional ExtensionArrays that are supported through the
837823
# __arrow_array__ protocol + by defining a custom ExtensionType
@@ -844,7 +830,7 @@ def test_additional_extension_types(self, pa):
844830
)
845831
check_round_trip(df, pa)
846832

847-
@td.skip_if_no("pyarrow", min_version="0.16.0")
833+
@td.skip_if_no("pyarrow")
848834
def test_use_nullable_dtypes(self, pa):
849835
import pyarrow.parquet as pq
850836

@@ -880,7 +866,7 @@ def test_timestamp_nanoseconds(self, pa):
880866
check_round_trip(df, pa, write_kwargs={"version": "2.0"})
881867

882868
def test_timezone_aware_index(self, pa, timezone_aware_date_list):
883-
if Version(pyarrow.__version__) >= Version("2.0.0"):
869+
if not pa_version_under2p0:
884870
# temporary skip this test until it is properly resolved
885871
# https://github.com/pandas-dev/pandas/issues/37286
886872
pytest.skip()

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ xlsxwriter
6565
xlwt
6666
odfpy
6767
fastparquet>=0.3.2
68-
pyarrow>=0.15.0
68+
pyarrow>=0.17.0
6969
python-snappy
7070
pyqt5>=5.9.2
7171
tables>=3.5.1

0 commit comments

Comments
 (0)