Skip to content

Commit f939b2c

Browse files
Backport PR #45219: MISC: Check that min versions are aligned in CI and import_optional_dependency (#45537)
Co-authored-by: Matthew Roeschke <[email protected]>
1 parent feb5346 commit f939b2c

File tree

5 files changed

+101
-5
lines changed

5 files changed

+101
-5
lines changed

.pre-commit-config.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -181,3 +181,8 @@ repos:
181181
entry: 'pg8000'
182182
files: ^ci/deps
183183
types: [yaml]
184+
- id: validate-min-versions-in-sync
185+
name: Check minimum version of dependencies are aligned
186+
entry: python scripts/validate_min_versions_in_sync.py
187+
language: python
188+
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$

ci/deps/actions-38-minimum_versions.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,14 @@ dependencies:
3232
- matplotlib=3.3.2
3333
- numba=0.50.1
3434
- numexpr=2.7.1
35-
- openpyxl=3.0.3
3635
- odfpy=1.4.1
36+
- openpyxl=3.0.3
3737
- pandas-gbq=0.14.0
3838
- psycopg2=2.8.4
3939
- pymysql=0.10.1
4040
- pytables=3.6.1
4141
- pyarrow=1.0.1
42-
- pyreadstat
42+
- pyreadstat=1.1.0
4343
- pyxlsb=1.0.6
4444
- s3fs=0.4.0
4545
- scipy=1.4.1

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -361,7 +361,7 @@ blosc 1.20.1 Compression for HDF5
361361
zlib Compression for HDF5
362362
fastparquet 0.4.0 Parquet reading / writing
363363
pyarrow 1.0.1 Parquet, ORC, and feather reading / writing
364-
pyreadstat SPSS files (.sav) reading
364+
pyreadstat 1.1.0 SPSS files (.sav) reading
365365
========================= ================== =============================================================
366366

367367
.. _install.warn_orc:

pandas/compat/_optional.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,24 @@
1111

1212
VERSIONS = {
1313
"bs4": "4.8.2",
14+
"blosc": "1.20.1",
1415
"bottleneck": "1.3.1",
15-
"fsspec": "0.7.4",
1616
"fastparquet": "0.4.0",
17+
"fsspec": "0.7.4",
18+
"html5lib": "1.1",
1719
"gcsfs": "0.6.0",
20+
"jinja2": "2.11",
1821
"lxml.etree": "4.5.0",
1922
"matplotlib": "3.3.2",
23+
"numba": "0.50.1",
2024
"numexpr": "2.7.1",
2125
"odfpy": "1.4.1",
2226
"openpyxl": "3.0.3",
2327
"pandas_gbq": "0.14.0",
28+
"psycopg2": "2.8.4", # (dt dec pq3 ext lo64)
29+
"pymysql": "0.10.1",
2430
"pyarrow": "1.0.1",
31+
"pyreadstat": "1.1.0",
2532
"pytest": "6.0",
2633
"pyxlsb": "1.0.6",
2734
"s3fs": "0.4.0",
@@ -33,7 +40,6 @@
3340
"xlrd": "2.0.1",
3441
"xlwt": "1.3.0",
3542
"xlsxwriter": "1.2.2",
36-
"numba": "0.50.1",
3743
"zstandard": "0.15.2",
3844
}
3945

@@ -46,6 +52,7 @@
4652
"lxml.etree": "lxml",
4753
"odf": "odfpy",
4854
"pandas_gbq": "pandas-gbq",
55+
"tables": "pytables",
4956
"sqlalchemy": "SQLAlchemy",
5057
"jinja2": "Jinja2",
5158
}
@@ -59,6 +66,9 @@ def get_version(module: types.ModuleType) -> str:
5966

6067
if version is None:
6168
raise ImportError(f"Can't determine version for {module.__name__}")
69+
if module.__name__ == "psycopg2":
70+
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
71+
version = version.split()[0]
6272
return version
6373

6474

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Check pandas required and optional dependencies are synced across:
4+
5+
ci/deps/actions-.*-minimum_versions.yaml
6+
pandas/compat/_optional.py
7+
8+
TODO: doc/source/getting_started/install.rst
9+
10+
This is meant to be run as a pre-commit hook - to run it manually, you can do:
11+
12+
pre-commit run validate-min-versions-in-sync --all-files
13+
"""
14+
from __future__ import annotations
15+
16+
import pathlib
17+
import sys
18+
19+
DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
20+
CI_PATH = next(
21+
pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
22+
)
23+
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
24+
# pandas package is not available
25+
# in pre-commit environment
26+
sys.path.append("pandas/compat")
27+
sys.path.append("pandas/util")
28+
import version
29+
30+
sys.modules["pandas.util.version"] = version
31+
import _optional
32+
33+
34+
def get_versions_from_code() -> dict[str, str]:
35+
install_map = _optional.INSTALL_MAPPING
36+
versions = _optional.VERSIONS
37+
return {
38+
install_map.get(k, k).casefold(): v
39+
for k, v in versions.items()
40+
if k != "pytest"
41+
}
42+
43+
44+
def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]:
45+
# Don't parse with pyyaml because it ignores comments we're looking for
46+
seen_required = False
47+
seen_optional = False
48+
required_deps = {}
49+
optional_deps = {}
50+
for line in content:
51+
if "# required dependencies" in line:
52+
seen_required = True
53+
elif "# optional dependencies" in line:
54+
seen_optional = True
55+
elif seen_required and line.strip():
56+
package, version = line.strip().split("=")
57+
package = package[2:]
58+
if not seen_optional:
59+
required_deps[package] = version
60+
else:
61+
optional_deps[package] = version
62+
return required_deps, optional_deps
63+
64+
65+
def main():
66+
with open(CI_PATH, encoding="utf-8") as f:
67+
_, ci_optional = get_versions_from_ci(f.readlines())
68+
code_optional = get_versions_from_code()
69+
diff = set(ci_optional.items()).symmetric_difference(code_optional.items())
70+
if diff:
71+
sys.stdout.write(
72+
f"The follow minimum version differences were found between "
73+
f"{CI_PATH} and {CODE_PATH}. Please ensure these are aligned: "
74+
f"{diff}\n"
75+
)
76+
sys.exit(1)
77+
sys.exit(0)
78+
79+
80+
if __name__ == "__main__":
81+
main()

0 commit comments

Comments
 (0)