Skip to content

Backport PR #45219 on branch 1.4.x (MISC: Check that min versions are aligned in CI and import_optional_dependency) #45537

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,8 @@ repos:
entry: 'pg8000'
files: ^ci/deps
types: [yaml]
- id: validate-min-versions-in-sync
name: Check minimum version of dependencies are aligned
entry: python scripts/validate_min_versions_in_sync.py
language: python
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
4 changes: 2 additions & 2 deletions ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ dependencies:
- matplotlib=3.3.2
- numba=0.50.1
- numexpr=2.7.1
- openpyxl=3.0.3
- odfpy=1.4.1
- openpyxl=3.0.3
- pandas-gbq=0.14.0
- psycopg2=2.8.4
- pymysql=0.10.1
- pytables=3.6.1
- pyarrow=1.0.1
- pyreadstat
- pyreadstat=1.1.0
- pyxlsb=1.0.6
- s3fs=0.4.0
- scipy=1.4.1
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ blosc 1.20.1 Compression for HDF5
zlib Compression for HDF5
fastparquet 0.4.0 Parquet reading / writing
pyarrow 1.0.1 Parquet, ORC, and feather reading / writing
pyreadstat SPSS files (.sav) reading
pyreadstat 1.1.0 SPSS files (.sav) reading
========================= ================== =============================================================

.. _install.warn_orc:
Expand Down
14 changes: 12 additions & 2 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,24 @@

VERSIONS = {
"bs4": "4.8.2",
"blosc": "1.20.1",
"bottleneck": "1.3.1",
"fsspec": "0.7.4",
"fastparquet": "0.4.0",
"fsspec": "0.7.4",
"html5lib": "1.1",
"gcsfs": "0.6.0",
"jinja2": "2.11",
"lxml.etree": "4.5.0",
"matplotlib": "3.3.2",
"numba": "0.50.1",
"numexpr": "2.7.1",
"odfpy": "1.4.1",
"openpyxl": "3.0.3",
"pandas_gbq": "0.14.0",
"psycopg2": "2.8.4", # (dt dec pq3 ext lo64)
"pymysql": "0.10.1",
"pyarrow": "1.0.1",
"pyreadstat": "1.1.0",
"pytest": "6.0",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
Expand All @@ -33,7 +40,6 @@
"xlrd": "2.0.1",
"xlwt": "1.3.0",
"xlsxwriter": "1.2.2",
"numba": "0.50.1",
"zstandard": "0.15.2",
}

Expand All @@ -46,6 +52,7 @@
"lxml.etree": "lxml",
"odf": "odfpy",
"pandas_gbq": "pandas-gbq",
"tables": "pytables",
"sqlalchemy": "SQLAlchemy",
"jinja2": "Jinja2",
}
Expand All @@ -59,6 +66,9 @@ def get_version(module: types.ModuleType) -> str:

if version is None:
raise ImportError(f"Can't determine version for {module.__name__}")
if module.__name__ == "psycopg2":
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
version = version.split()[0]
return version


Expand Down
81 changes: 81 additions & 0 deletions scripts/validate_min_versions_in_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""
Check pandas required and optional dependencies are synced across:

ci/deps/actions-.*-minimum_versions.yaml
pandas/compat/_optional.py

TODO: doc/source/getting_started/install.rst

This is meant to be run as a pre-commit hook - to run it manually, you can do:

pre-commit run validate-min-versions-in-sync --all-files
"""
from __future__ import annotations

import pathlib
import sys

DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
CI_PATH = next(
pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
)
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
# pandas package is not available
# in pre-commit environment
sys.path.append("pandas/compat")
sys.path.append("pandas/util")
import version

sys.modules["pandas.util.version"] = version
import _optional


def get_versions_from_code() -> dict[str, str]:
install_map = _optional.INSTALL_MAPPING
versions = _optional.VERSIONS
return {
install_map.get(k, k).casefold(): v
for k, v in versions.items()
if k != "pytest"
}


def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]:
# Don't parse with pyyaml because it ignores comments we're looking for
seen_required = False
seen_optional = False
required_deps = {}
optional_deps = {}
for line in content:
if "# required dependencies" in line:
seen_required = True
elif "# optional dependencies" in line:
seen_optional = True
elif seen_required and line.strip():
package, version = line.strip().split("=")
package = package[2:]
if not seen_optional:
required_deps[package] = version
else:
optional_deps[package] = version
return required_deps, optional_deps


def main():
with open(CI_PATH, encoding="utf-8") as f:
_, ci_optional = get_versions_from_ci(f.readlines())
code_optional = get_versions_from_code()
diff = set(ci_optional.items()).symmetric_difference(code_optional.items())
if diff:
sys.stdout.write(
f"The follow minimum version differences were found between "
f"{CI_PATH} and {CODE_PATH}. Please ensure these are aligned: "
f"{diff}\n"
)
sys.exit(1)
sys.exit(0)


if __name__ == "__main__":
main()