Skip to content

Commit 4a2c06c

Browse files
authored
CI: Test pyarrow nightly instead of intermediate versions (#52211)
* CI: Test pyarrow nightly instead of intermediate versions * Change format * Pin, remove hardcoded channel * Try pip * Fix some tests * Address more tests * Fix test condition * Fix another condidition * Cleanup name * Remove boto3
1 parent 176d256 commit 4a2c06c

17 files changed

+58
-54
lines changed

.github/actions/setup-conda/action.yml

-11
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,9 @@ inputs:
99
extra-specs:
1010
description: Extra packages to install
1111
required: false
12-
pyarrow-version:
13-
description: If set, overrides the PyArrow version in the Conda environment to the given string.
14-
required: false
1512
runs:
1613
using: composite
1714
steps:
18-
- name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
19-
run: |
20-
grep -q ' - pyarrow' ${{ inputs.environment-file }}
21-
sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
22-
cat ${{ inputs.environment-file }}
23-
shell: bash
24-
if: ${{ inputs.pyarrow-version }}
25-
2615
- name: Install ${{ inputs.environment-file }}
2716
uses: mamba-org/provision-with-micromamba@v12
2817
with:

.github/workflows/macos-windows.yml

-1
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,6 @@ jobs:
5252
uses: ./.github/actions/setup-conda
5353
with:
5454
environment-file: ci/deps/${{ matrix.env_file }}
55-
pyarrow-version: ${{ matrix.os == 'macos-latest' && '9' || '' }}
5655

5756
- name: Build Pandas
5857
uses: ./.github/actions/build_pandas

.github/workflows/ubuntu.yml

+5-17
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ jobs:
2828
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
2929
# Prevent the include jobs from overriding other jobs
3030
pattern: [""]
31-
pyarrow_version: ["8", "9", "10"]
3231
include:
3332
- name: "Downstream Compat"
3433
env_file: actions-38-downstream_compat.yaml
@@ -76,21 +75,11 @@ jobs:
7675
# TODO(cython3): Re-enable once next-beta(after beta 1) comes out
7776
# There are some warnings failing the build with -werror
7877
pandas_ci: "0"
79-
exclude:
80-
- env_file: actions-38.yaml
81-
pyarrow_version: "8"
82-
- env_file: actions-38.yaml
83-
pyarrow_version: "9"
84-
- env_file: actions-39.yaml
85-
pyarrow_version: "8"
86-
- env_file: actions-39.yaml
87-
pyarrow_version: "9"
88-
- env_file: actions-310.yaml
89-
pyarrow_version: "8"
90-
- env_file: actions-310.yaml
91-
pyarrow_version: "9"
78+
- name: "Pyarrow Nightly"
79+
env_file: actions-311-pyarrownightly.yaml
80+
pattern: "not slow and not network and not single_cpu"
9281
fail-fast: false
93-
name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
82+
name: ${{ matrix.name || matrix.env_file }}
9483
env:
9584
ENV_FILE: ci/deps/${{ matrix.env_file }}
9685
PATTERN: ${{ matrix.pattern }}
@@ -108,7 +97,7 @@ jobs:
10897
COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }}
10998
concurrency:
11099
# https://github.community/t/concurrecy-not-work-for-push/183068/7
111-
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.pyarrow_version || '' }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }}
100+
group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }}
112101
cancel-in-progress: true
113102

114103
services:
@@ -167,7 +156,6 @@ jobs:
167156
uses: ./.github/actions/setup-conda
168157
with:
169158
environment-file: ${{ env.ENV_FILE }}
170-
pyarrow-version: ${{ matrix.pyarrow_version }}
171159

172160
- name: Build Pandas
173161
id: build

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dependencies:
4141
- psycopg2>=2.8.6
4242
- pymysql>=1.0.2
4343
- pytables>=3.6.1
44-
- pyarrow
44+
- pyarrow>=7.0.0
4545
- pyreadstat>=1.1.2
4646
- python-snappy>=0.6.0
4747
- pyxlsb>=1.0.8
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
name: pandas-dev
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- python=3.11
6+
7+
# build dependencies
8+
- versioneer[toml]
9+
- cython>=0.29.33
10+
11+
# test dependencies
12+
- pytest>=7.0.0
13+
- pytest-cov
14+
- pytest-xdist>=2.2.0
15+
- hypothesis>=6.34.2
16+
- pytest-asyncio>=0.17.0
17+
18+
# required dependencies
19+
- python-dateutil
20+
- numpy
21+
- pytz
22+
- pip
23+
24+
- pip:
25+
- "tzdata>=2022.1"
26+
- "--extra-index-url https://pypi.fury.io/arrow-nightlies/"
27+
- "--prefer-binary"
28+
- "--pre"
29+
- "pyarrow"

ci/deps/actions-311.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dependencies:
4141
- psycopg2>=2.8.6
4242
- pymysql>=1.0.2
4343
# - pytables>=3.8.0 # first version that supports 3.11
44-
- pyarrow
44+
- pyarrow>=7.0.0
4545
- pyreadstat>=1.1.2
4646
- python-snappy>=0.6.0
4747
- pyxlsb>=1.0.8

ci/deps/actions-38-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- openpyxl<3.1.1, >=3.0.7
4040
- odfpy>=1.4.1
4141
- psycopg2>=2.8.6
42-
- pyarrow
42+
- pyarrow>=7.0.0
4343
- pymysql>=1.0.2
4444
- pyreadstat>=1.1.2
4545
- pytables>=3.6.1

ci/deps/actions-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- odfpy>=1.4.1
4040
- pandas-gbq>=0.15.0
4141
- psycopg2>=2.8.6
42-
- pyarrow
42+
- pyarrow>=7.0.0
4343
- pymysql>=1.0.2
4444
- pyreadstat>=1.1.2
4545
- pytables>=3.6.1

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- pandas-gbq>=0.15.0
4141
- psycopg2>=2.8.6
4242
- pymysql>=1.0.2
43-
- pyarrow
43+
- pyarrow>=7.0.0
4444
- pyreadstat>=1.1.2
4545
- pytables>=3.6.1
4646
- python-snappy>=0.6.0

ci/deps/circle-38-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- odfpy>=1.4.1
4040
- pandas-gbq>=0.15.0
4141
- psycopg2>=2.8.6
42-
- pyarrow
42+
- pyarrow>=7.0.0
4343
- pymysql>=1.0.2
4444
# Not provided on ARM
4545
#- pyreadstat

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ dependencies:
4242
- odfpy>=1.4.1
4343
- py
4444
- psycopg2>=2.8.6
45-
- pyarrow
45+
- pyarrow>=7.0.0
4646
- pymysql>=1.0.2
4747
- pyreadstat>=1.1.2
4848
- pytables>=3.6.1

pandas/io/parquet.py

+8-12
Original file line numberDiff line numberDiff line change
@@ -92,22 +92,18 @@ def _get_path_or_handle(
9292
if fs is not None:
9393
pa_fs = import_optional_dependency("pyarrow.fs", errors="ignore")
9494
fsspec = import_optional_dependency("fsspec", errors="ignore")
95-
if pa_fs is None and fsspec is None:
96-
raise ValueError(
97-
f"filesystem must be a pyarrow or fsspec FileSystem, "
98-
f"not a {type(fs).__name__}"
99-
)
100-
elif (pa_fs is not None and not isinstance(fs, pa_fs.FileSystem)) and (
101-
fsspec is not None and not isinstance(fs, fsspec.spec.AbstractFileSystem)
102-
):
95+
if pa_fs is not None and isinstance(fs, pa_fs.FileSystem):
96+
if storage_options:
97+
raise NotImplementedError(
98+
"storage_options not supported with a pyarrow FileSystem."
99+
)
100+
elif fsspec is not None and isinstance(fs, fsspec.spec.AbstractFileSystem):
101+
pass
102+
else:
103103
raise ValueError(
104104
f"filesystem must be a pyarrow or fsspec FileSystem, "
105105
f"not a {type(fs).__name__}"
106106
)
107-
elif pa_fs is not None and isinstance(fs, pa_fs.FileSystem) and storage_options:
108-
raise NotImplementedError(
109-
"storage_options not supported with a pyarrow FileSystem."
110-
)
111107
if is_fsspec_url(path_or_handle) and fs is None:
112108
if storage_options is None:
113109
pa = import_optional_dependency("pyarrow")

pandas/tests/arrays/string_/test_string.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import pandas as pd
1313
import pandas._testing as tm
1414
from pandas.core.arrays.string_arrow import ArrowStringArray
15+
from pandas.util.version import Version
1516

1617

1718
@pytest.fixture
@@ -406,15 +407,14 @@ def test_fillna_args(dtype, request):
406407
arr.fillna(value=1)
407408

408409

409-
@td.skip_if_no("pyarrow")
410410
def test_arrow_array(dtype):
411411
# protocol added in 0.15.0
412-
import pyarrow as pa
412+
pa = pytest.importorskip("pyarrow")
413413

414414
data = pd.array(["a", "b", "c"], dtype=dtype)
415415
arr = pa.array(data)
416416
expected = pa.array(list(data), type=pa.string(), from_pandas=True)
417-
if dtype.storage == "pyarrow":
417+
if dtype.storage == "pyarrow" and Version(pa.__version__) <= Version("11.0.0"):
418418
expected = pa.chunked_array(expected)
419419

420420
assert arr.equals(expected)

pandas/tests/io/test_parquet.py

+3
Original file line numberDiff line numberDiff line change
@@ -1019,7 +1019,10 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa):
10191019
{"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]"
10201020
)
10211021
expected = df.copy()
1022+
import pyarrow
10221023

1024+
if Version(pyarrow.__version__) > Version("11.0.0"):
1025+
expected.index = expected.index.astype("int64[pyarrow]")
10231026
check_round_trip(
10241027
df,
10251028
engine=pa,

pandas/tests/util/test_show_versions.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def test_show_versions_console(capsys):
6565
assert re.search(r"numpy\s*:\s[0-9]+\..*\n", result)
6666

6767
# check optional dependency
68-
assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result)
68+
assert re.search(r"pyarrow\s*:\s([0-9]+.*|None)\n", result)
6969

7070

7171
def test_json_output_match(capsys, tmpdir):

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ openpyxl<3.1.1, >=3.0.7
3131
odfpy>=1.4.1
3232
py
3333
psycopg2-binary>=2.8.6
34-
pyarrow
34+
pyarrow>=7.0.0
3535
pymysql>=1.0.2
3636
pyreadstat>=1.1.2
3737
tables>=3.6.1

scripts/validate_min_versions_in_sync.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
YAML_PATH = pathlib.Path("ci/deps")
3838
ENV_PATH = pathlib.Path("environment.yml")
3939
EXCLUDE_DEPS = {"tzdata", "blosc"}
40-
EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]", "pyarrow"])
40+
EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"])
4141
# pandas package is not available
4242
# in pre-commit environment
4343
sys.path.append("pandas/compat")

0 commit comments

Comments
 (0)