diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index efc31bba88f28..329dc24d466b4 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -9,20 +9,9 @@ inputs: extra-specs: description: Extra packages to install required: false - pyarrow-version: - description: If set, overrides the PyArrow version in the Conda environment to the given string. - required: false runs: using: composite steps: - - name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }} - run: | - grep -q ' - pyarrow' ${{ inputs.environment-file }} - sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }} - cat ${{ inputs.environment-file }} - shell: bash - if: ${{ inputs.pyarrow-version }} - - name: Install ${{ inputs.environment-file }} uses: mamba-org/provision-with-micromamba@v12 with: diff --git a/.github/workflows/macos-windows.yml b/.github/workflows/macos-windows.yml index 15308d0c086f6..7ed5f5b90b959 100644 --- a/.github/workflows/macos-windows.yml +++ b/.github/workflows/macos-windows.yml @@ -52,7 +52,6 @@ jobs: uses: ./.github/actions/setup-conda with: environment-file: ci/deps/${{ matrix.env_file }} - pyarrow-version: ${{ matrix.os == 'macos-latest' && '9' || '' }} - name: Build Pandas uses: ./.github/actions/build_pandas diff --git a/.github/workflows/ubuntu.yml b/.github/workflows/ubuntu.yml index 08dd09e57871b..97ca346142ec1 100644 --- a/.github/workflows/ubuntu.yml +++ b/.github/workflows/ubuntu.yml @@ -28,7 +28,6 @@ jobs: env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml] # Prevent the include jobs from overriding other jobs pattern: [""] - pyarrow_version: ["8", "9", "10"] include: - name: "Downstream Compat" env_file: actions-38-downstream_compat.yaml @@ -76,21 +75,11 @@ jobs: # TODO(cython3): Re-enable once next-beta(after beta 1) comes out # There are some warnings failing the build with -werror pandas_ci: "0" - exclude: - - env_file: actions-38.yaml - pyarrow_version: "8" - - env_file: actions-38.yaml - pyarrow_version: "9" - - env_file: actions-39.yaml - pyarrow_version: "8" - - env_file: actions-39.yaml - pyarrow_version: "9" - - env_file: actions-310.yaml - pyarrow_version: "8" - - env_file: actions-310.yaml - pyarrow_version: "9" + - name: "Pyarrow Nightly" + env_file: actions-311-pyarrownightly.yaml + pattern: "not slow and not network and not single_cpu" fail-fast: false - name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }} + name: ${{ matrix.name || matrix.env_file }} env: ENV_FILE: ci/deps/${{ matrix.env_file }} PATTERN: ${{ matrix.pattern }} @@ -108,7 +97,7 @@ jobs: COVERAGE: ${{ !contains(matrix.env_file, 'pypy') }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.pyarrow_version || '' }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }} + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_data_manager || '' }} cancel-in-progress: true services: @@ -167,7 +156,6 @@ jobs: uses: ./.github/actions/setup-conda with: environment-file: ${{ env.ENV_FILE }} - pyarrow-version: ${{ matrix.pyarrow_version }} - name: Build Pandas id: build diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index f40b555593f6b..47405b72476fd 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -41,7 +41,7 @@ dependencies: - psycopg2>=2.8.6 - pymysql>=1.0.2 - pytables>=3.6.1 - - pyarrow + - pyarrow>=7.0.0 - pyreadstat>=1.1.2 - python-snappy>=0.6.0 - pyxlsb>=1.0.8 diff --git a/ci/deps/actions-311-pyarrownightly.yaml b/ci/deps/actions-311-pyarrownightly.yaml new file mode 100644 index 0000000000000..77e4fc9d2c2d9 --- /dev/null +++ b/ci/deps/actions-311-pyarrownightly.yaml @@ -0,0 +1,29 @@ +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.11 + + # build dependencies + - versioneer[toml] + - cython>=0.29.33 + + # test dependencies + - pytest>=7.0.0 + - pytest-cov + - pytest-xdist>=2.2.0 + - hypothesis>=6.34.2 + - pytest-asyncio>=0.17.0 + + # required dependencies + - python-dateutil + - numpy + - pytz + - pip + + - pip: + - "tzdata>=2022.1" + - "--extra-index-url https://pypi.fury.io/arrow-nightlies/" + - "--prefer-binary" + - "--pre" + - "pyarrow" diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index fa08bdf438dff..9ebfb710e0abb 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -41,7 +41,7 @@ dependencies: - psycopg2>=2.8.6 - pymysql>=1.0.2 # - pytables>=3.8.0 # first version that supports 3.11 - - pyarrow + - pyarrow>=7.0.0 - pyreadstat>=1.1.2 - python-snappy>=0.6.0 - pyxlsb>=1.0.8 diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index a9265bd84ee87..3ed2786b76896 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -39,7 +39,7 @@ dependencies: - openpyxl<3.1.1, >=3.0.7 - odfpy>=1.4.1 - psycopg2>=2.8.6 - - pyarrow + - pyarrow>=7.0.0 - pymysql>=1.0.2 - pyreadstat>=1.1.2 - pytables>=3.6.1 diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index 27872514447a5..4060a837d1757 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -39,7 +39,7 @@ dependencies: - odfpy>=1.4.1 - pandas-gbq>=0.15.0 - psycopg2>=2.8.6 - - pyarrow + - pyarrow>=7.0.0 - pymysql>=1.0.2 - pyreadstat>=1.1.2 - pytables>=3.6.1 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 4b0575d8a3afd..53cd9c5635493 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -40,7 +40,7 @@ dependencies: - pandas-gbq>=0.15.0 - psycopg2>=2.8.6 - pymysql>=1.0.2 - - pyarrow + - pyarrow>=7.0.0 - pyreadstat>=1.1.2 - pytables>=3.6.1 - python-snappy>=0.6.0 diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index c3d89e735ae37..2e4070fa82010 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -39,7 +39,7 @@ dependencies: - odfpy>=1.4.1 - pandas-gbq>=0.15.0 - psycopg2>=2.8.6 - - pyarrow + - pyarrow>=7.0.0 - pymysql>=1.0.2 # Not provided on ARM #- pyreadstat diff --git a/environment.yml b/environment.yml index f29ade1dc5173..5aa1fad2e51c7 100644 --- a/environment.yml +++ b/environment.yml @@ -42,7 +42,7 @@ dependencies: - odfpy>=1.4.1 - py - psycopg2>=2.8.6 - - pyarrow + - pyarrow>=7.0.0 - pymysql>=1.0.2 - pyreadstat>=1.1.2 - pytables>=3.6.1 diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 7791ca53a6447..30dfceb29155a 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -92,22 +92,18 @@ def _get_path_or_handle( if fs is not None: pa_fs = import_optional_dependency("pyarrow.fs", errors="ignore") fsspec = import_optional_dependency("fsspec", errors="ignore") - if pa_fs is None and fsspec is None: - raise ValueError( - f"filesystem must be a pyarrow or fsspec FileSystem, " - f"not a {type(fs).__name__}" - ) - elif (pa_fs is not None and not isinstance(fs, pa_fs.FileSystem)) and ( - fsspec is not None and not isinstance(fs, fsspec.spec.AbstractFileSystem) - ): + if pa_fs is not None and isinstance(fs, pa_fs.FileSystem): + if storage_options: + raise NotImplementedError( + "storage_options not supported with a pyarrow FileSystem." + ) + elif fsspec is not None and isinstance(fs, fsspec.spec.AbstractFileSystem): + pass + else: raise ValueError( f"filesystem must be a pyarrow or fsspec FileSystem, " f"not a {type(fs).__name__}" ) - elif pa_fs is not None and isinstance(fs, pa_fs.FileSystem) and storage_options: - raise NotImplementedError( - "storage_options not supported with a pyarrow FileSystem." - ) if is_fsspec_url(path_or_handle) and fs is None: if storage_options is None: pa = import_optional_dependency("pyarrow") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index dd0b43c116266..7e4869589cee6 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -12,6 +12,7 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.string_arrow import ArrowStringArray +from pandas.util.version import Version @pytest.fixture @@ -406,15 +407,14 @@ def test_fillna_args(dtype, request): arr.fillna(value=1) -@td.skip_if_no("pyarrow") def test_arrow_array(dtype): # protocol added in 0.15.0 - import pyarrow as pa + pa = pytest.importorskip("pyarrow") data = pd.array(["a", "b", "c"], dtype=dtype) arr = pa.array(data) expected = pa.array(list(data), type=pa.string(), from_pandas=True) - if dtype.storage == "pyarrow": + if dtype.storage == "pyarrow" and Version(pa.__version__) <= Version("11.0.0"): expected = pa.chunked_array(expected) assert arr.equals(expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b55e97a4fe0ae..c74548bf63e06 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -1019,7 +1019,10 @@ def test_read_dtype_backend_pyarrow_config_index(self, pa): {"a": [1, 2]}, index=pd.Index([3, 4], name="test"), dtype="int64[pyarrow]" ) expected = df.copy() + import pyarrow + if Version(pyarrow.__version__) > Version("11.0.0"): + expected.index = expected.index.astype("int64[pyarrow]") check_round_trip( df, engine=pa, diff --git a/pandas/tests/util/test_show_versions.py b/pandas/tests/util/test_show_versions.py index 8ff78cc073acf..714588d179aef 100644 --- a/pandas/tests/util/test_show_versions.py +++ b/pandas/tests/util/test_show_versions.py @@ -65,7 +65,7 @@ def test_show_versions_console(capsys): assert re.search(r"numpy\s*:\s[0-9]+\..*\n", result) # check optional dependency - assert re.search(r"pyarrow\s*:\s([0-9\.]+|None)\n", result) + assert re.search(r"pyarrow\s*:\s([0-9]+.*|None)\n", result) def test_json_output_match(capsys, tmpdir): diff --git a/requirements-dev.txt b/requirements-dev.txt index 9c0bdc64d6e07..f3c9649a5a707 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -31,7 +31,7 @@ openpyxl<3.1.1, >=3.0.7 odfpy>=1.4.1 py psycopg2-binary>=2.8.6 -pyarrow +pyarrow>=7.0.0 pymysql>=1.0.2 pyreadstat>=1.1.2 tables>=3.6.1 diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index b6016a35e3dbb..e0182ebaaee60 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -37,7 +37,7 @@ YAML_PATH = pathlib.Path("ci/deps") ENV_PATH = pathlib.Path("environment.yml") EXCLUDE_DEPS = {"tzdata", "blosc"} -EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]", "pyarrow"]) +EXCLUSION_LIST = frozenset(["python=3.8[build=*_pypy]"]) # pandas package is not available # in pre-commit environment sys.path.append("pandas/compat")