diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f369fcabe3f01..b1028ea9f52c3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -313,7 +313,7 @@ repos: entry: python scripts/generate_pip_deps_from_conda.py files: ^(environment.yml|requirements-dev.txt)$ pass_filenames: false - additional_dependencies: [pyyaml, toml] + additional_dependencies: [tomli, pyyaml] - id: title-capitalization name: Validate correct capitalization among titles in documentation entry: python scripts/validate_rst_title_capitalization.py @@ -391,10 +391,11 @@ repos: types: [yaml] - id: validate-min-versions-in-sync name: Check minimum version of dependencies are aligned - entry: python scripts/validate_min_versions_in_sync.py + entry: python -m scripts.validate_min_versions_in_sync language: python files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ - additional_dependencies: [tomli] + additional_dependencies: [tomli, pyyaml] + pass_filenames: false - id: validate-errors-locations name: Validate errors locations description: Validate errors are in appropriate locations. diff --git a/ci/deps/actions-310-numpydev.yaml b/ci/deps/actions-310-numpydev.yaml index d1c4338f1806e..1a461319685d2 100644 --- a/ci/deps/actions-310-numpydev.yaml +++ b/ci/deps/actions-310-numpydev.yaml @@ -12,7 +12,7 @@ dependencies: - pytest-cov - pytest-xdist>=2.2.0 - hypothesis>=6.34.2 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 # pandas dependencies - python-dateutil diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index e2bfe6e57d216..64f9a3fd1ffbc 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -12,7 +12,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -21,36 +21,36 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - bottleneck - - brotlipy - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - - numba - - numexpr - - openpyxl<3.1.1 - - odfpy - - pandas-gbq - - psycopg2 - - pymysql - - pytables + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 + - pymysql>=1.0.2 + - pytables>=3.6.1 - pyarrow - - pyreadstat - - python-snappy - - pyxlsb + - pyreadstat>=1.1.2 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 - tzdata>=2022a - - xarray - - xlrd - - xlsxwriter - - zstandard + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index 237924b5c6f0b..d474df1e75655 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -9,10 +9,10 @@ dependencies: - cython>=0.29.32 # test dependencies - - pytest>=7.0 + - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -21,36 +21,36 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - bottleneck - - brotlipy - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 # - numba not compatible with 3.11 - - numexpr - - openpyxl<3.1.1 - - odfpy - - pandas-gbq - - psycopg2 - - pymysql + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 + - pymysql>=1.0.2 # - pytables>=3.8.0 # first version that supports 3.11 - pyarrow - - pyreadstat - - python-snappy - - pyxlsb + - pyreadstat>=1.1.2 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 - tzdata>=2022a - - xarray - - xlrd - - xlsxwriter - - zstandard + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index 9b62b25a15740..a9cd4c93dd604 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -13,7 +13,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -22,37 +22,37 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - brotlipy - - bottleneck - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - brotlipy>=0.7.0 + - bottleneck>=1.3.2 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - - numba - - numexpr - - openpyxl<3.1.1 - - odfpy - - psycopg2 + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - psycopg2>=2.8.6 - pyarrow - - pymysql - - pyreadstat - - pytables - - python-snappy - - pyxlsb + - pymysql>=1.0.2 + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate - - xarray - - xlrd - - xlsxwriter - - zstandard + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 # downstream packages - botocore @@ -65,6 +65,6 @@ dependencies: - statsmodels - coverage - pandas-datareader - - pandas-gbq + - pandas-gbq>=0.15.0 - pyyaml - py diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index f3ff36a1b2ada..6877d7f14f66a 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -14,7 +14,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index 95bab9897ac63..ccde0f57f7bc4 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -12,7 +12,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -21,35 +21,35 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - bottleneck - - brotlipy - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - - numba - - numexpr - - openpyxl<3.1.1 - - odfpy - - pandas-gbq - - psycopg2 + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 - pyarrow - - pymysql - - pyreadstat - - pytables - - python-snappy - - pyxlsb + - pymysql>=1.0.2 + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate - - xarray - - xlrd - - xlsxwriter - - zstandard + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 9d95e28ae9fb6..aeb887d7ec1ab 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -12,7 +12,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -21,36 +21,36 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - bottleneck - - brotlipy - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - - numba - - numexpr - - openpyxl<3.1.1 - - odfpy - - pandas-gbq - - psycopg2 - - pymysql + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 + - pymysql>=1.0.2 - pyarrow - - pyreadstat - - pytables - - python-snappy - - pyxlsb + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 - tzdata>=2022a - - xarray - - xlrd - - xlsxwriter - - zstandard + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/ci/deps/actions-pypy-38.yaml b/ci/deps/actions-pypy-38.yaml index 3218ec13a9c40..1fde1e733be5a 100644 --- a/ci/deps/actions-pypy-38.yaml +++ b/ci/deps/actions-pypy-38.yaml @@ -14,7 +14,7 @@ dependencies: # test dependencies - pytest>=7.0.0 - pytest-cov - - pytest-asyncio + - pytest-asyncio>=0.17.0 - pytest-xdist>=2.2.0 - hypothesis>=6.34.2 diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index 1548eb3d4929d..0d1a5f765b5ce 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -12,7 +12,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - boto3 # required dependencies @@ -21,36 +21,36 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - bottleneck - - brotlipy - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs - - jinja2 - - lxml + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - - numba - - numexpr - - openpyxl<3.1.1 - - odfpy - - pandas-gbq - - psycopg2 + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 - pyarrow - - pymysql + - pymysql>=1.0.2 # Not provided on ARM #- pyreadstat - - pytables - - python-snappy - - pyxlsb + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate - - xarray - - xlrd - - xlsxwriter - - zstandard + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/environment.yml b/environment.yml index 41c93de50bff3..e748d20d6d6f0 100644 --- a/environment.yml +++ b/environment.yml @@ -14,7 +14,7 @@ dependencies: - pytest>=7.0.0 - pytest-cov - pytest-xdist>=2.2.0 - - pytest-asyncio>=0.17 + - pytest-asyncio>=0.17.0 - coverage # required dependencies @@ -23,40 +23,40 @@ dependencies: - pytz # optional dependencies - - beautifulsoup4 + - beautifulsoup4>=4.9.3 - blosc - - brotlipy - - bottleneck - - fastparquet - - fsspec - - html5lib - - hypothesis - - gcsfs + - brotlipy>=0.7.0 + - bottleneck>=1.3.2 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 - ipython - - jinja2 - - lxml + - jinja2>=3.0.0 + - lxml>=4.6.3 - matplotlib>=3.6.1, <3.7.0 - numba>=0.53.1 - - numexpr>=2.8.0 # pin for "Run checks on imported code" job - - openpyxl<3.1.1 - - odfpy + - numexpr>=2.7.3 # pin for "Run checks on imported code" job + - openpyxl<3.1.1, >=3.0.7 + - odfpy>=1.4.1 - py - - psycopg2 + - psycopg2>=2.8.6 - pyarrow - - pymysql - - pyreadstat - - pytables - - python-snappy - - pyxlsb + - pymysql>=1.0.2 + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 - s3fs>=2021.08.0 - - scipy - - sqlalchemy - - tabulate + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 - tzdata>=2022a - - xarray - - xlrd - - xlsxwriter - - zstandard + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 # downstream packages - dask-core @@ -96,6 +96,7 @@ dependencies: - types-python-dateutil - types-PyMySQL - types-pytz + - types-PyYAML - types-setuptools # documentation (jupyter notebooks) diff --git a/pyproject.toml b/pyproject.toml index 8c3d27b6bb5d3..c3a7cb013ca6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -289,6 +289,7 @@ disable = [ "broad-except", "c-extension-no-member", "comparison-with-itself", + "consider-using-enumerate", "import-error", "import-outside-toplevel", "invalid-name", diff --git a/requirements-dev.txt b/requirements-dev.txt index 1ad81472b1779..0329588de17fd 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -7,45 +7,45 @@ cython==0.29.32 pytest>=7.0.0 pytest-cov pytest-xdist>=2.2.0 -pytest-asyncio>=0.17 +pytest-asyncio>=0.17.0 coverage python-dateutil numpy pytz -beautifulsoup4 +beautifulsoup4>=4.9.3 blosc -brotlipy -bottleneck -fastparquet -fsspec -html5lib -hypothesis -gcsfs +brotlipy>=0.7.0 +bottleneck>=1.3.2 +fastparquet>=0.6.3 +fsspec>=2021.07.0 +html5lib>=1.1 +hypothesis>=6.34.2 +gcsfs>=2021.07.0 ipython -jinja2 -lxml +jinja2>=3.0.0 +lxml>=4.6.3 matplotlib>=3.6.1, <3.7.0 numba>=0.53.1 -numexpr>=2.8.0 -openpyxl<3.1.1 -odfpy +numexpr>=2.7.3 +openpyxl<3.1.1, >=3.0.7 +odfpy>=1.4.1 py -psycopg2-binary +psycopg2-binary>=2.8.6 pyarrow -pymysql -pyreadstat -tables -python-snappy -pyxlsb +pymysql>=1.0.2 +pyreadstat>=1.1.2 +tables>=3.6.1 +python-snappy>=0.6.0 +pyxlsb>=1.0.8 s3fs>=2021.08.0 -scipy -sqlalchemy -tabulate +scipy>=1.7.1 +SQLAlchemy>=1.4.16 +tabulate>=0.8.9 tzdata>=2022.1 -xarray -xlrd -xlsxwriter -zstandard +xarray>=0.21.0 +xlrd>=2.0.1 +xlsxwriter>=1.4.3 +zstandard>=0.15.2 dask seaborn moto @@ -71,6 +71,7 @@ sphinx-copybutton types-python-dateutil types-PyMySQL types-pytz +types-PyYAML types-setuptools nbconvert>=6.4.5 nbsphinx diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 8190104428724..2ca4455158db5 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -17,7 +17,10 @@ import re import sys -import toml +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib import yaml EXCLUDE = {"python", "c-compiler", "cxx-compiler"} @@ -27,6 +30,7 @@ "psycopg2": "psycopg2-binary", "dask-core": "dask", "seaborn-base": "seaborn", + "sqlalchemy": "SQLAlchemy", } @@ -105,7 +109,8 @@ def generate_pip_from_conda( pip_content = header + "\n".join(pip_deps) + "\n" # add setuptools to requirements-dev.txt - meta = toml.load(pathlib.Path(conda_path.parent, "pyproject.toml")) + with open(pathlib.Path(conda_path.parent, "pyproject.toml"), "rb") as fd: + meta = tomllib.load(fd) for requirement in meta["build-system"]["requires"]: if "setuptools" in requirement: pip_content += requirement diff --git a/scripts/tests/data/deps_expected_duplicate_package.yaml b/scripts/tests/data/deps_expected_duplicate_package.yaml new file mode 100644 index 0000000000000..72721c2842707 --- /dev/null +++ b/scripts/tests/data/deps_expected_duplicate_package.yaml @@ -0,0 +1,4 @@ +# Test: duplicate package +dependencies: + - jinja2>=3.0.0 + - jinja2>=3.0.0 diff --git a/scripts/tests/data/deps_expected_no_version.yaml b/scripts/tests/data/deps_expected_no_version.yaml new file mode 100644 index 0000000000000..843e48330a928 --- /dev/null +++ b/scripts/tests/data/deps_expected_no_version.yaml @@ -0,0 +1,5 @@ +# Test: empty version +dependencies: + - jinja2>=3.0.0 + - scipy>=1.7.1 + - SQLAlchemy>=1.4.16 diff --git a/scripts/tests/data/deps_expected_random.yaml b/scripts/tests/data/deps_expected_random.yaml new file mode 100644 index 0000000000000..be5e467b57e10 --- /dev/null +++ b/scripts/tests/data/deps_expected_random.yaml @@ -0,0 +1,57 @@ +# Test: random +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # build dependencies + - versioneer[toml] + - cython>=0.29.32 + + # test dependencies + - pytest>=7.0.0 + - pytest-cov + - pytest-xdist>=2.2.0 + - psutil + - pytest-asyncio>=0.17.0 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4>=5.9.3 + - blosc + - bottleneck>=1.3.2 + - brotlipy>=0.7.0 + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis>=6.34.2 + - gcsfs>=2021.07.0 + - jinja2>=3.0.0 + - lxml>=4.6.3 + - matplotlib>=3.6.1 + - numba>=0.53.1 + - numexpr>=2.7.3 + - openpyxl>=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2>=2.8.6 + - pyarrow<11, >=7.0.0 + - pymysql>=1.0.2 + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 + - s3fs>=2021.08.0 + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/scripts/tests/data/deps_expected_range.yaml b/scripts/tests/data/deps_expected_range.yaml new file mode 100644 index 0000000000000..c8e25076ef3b0 --- /dev/null +++ b/scripts/tests/data/deps_expected_range.yaml @@ -0,0 +1,5 @@ +# Test: range +dependencies: + - jinja2<8, >=3.0.0 + - scipy<9, >=1.7.1 + - SQLAlchemy<2.0, >=1.4.16 diff --git a/scripts/tests/data/deps_expected_same_version.yaml b/scripts/tests/data/deps_expected_same_version.yaml new file mode 100644 index 0000000000000..e07b221ecd44f --- /dev/null +++ b/scripts/tests/data/deps_expected_same_version.yaml @@ -0,0 +1,3 @@ +# Test: same version +dependencies: + - jinja2>=3.0.0 diff --git a/scripts/tests/data/deps_minimum.toml b/scripts/tests/data/deps_minimum.toml new file mode 100644 index 0000000000000..97a5ce1180bfb --- /dev/null +++ b/scripts/tests/data/deps_minimum.toml @@ -0,0 +1,537 @@ +[build-system] +# Minimum requirements for the build system to execute. +# See https://github.com/scipy/scipy/pull/12940 for the AIX issue. +requires = [ + "setuptools>=61.0.0", + "wheel", + "Cython>=0.29.32,<3", # Note: sync with setup.py, environment.yml and asv.conf.json + "oldest-supported-numpy>=2022.8.16", + "versioneer[toml]" +] +# build-backend = "setuptools.build_meta" + +[project] +name = 'pandas' +dynamic = [ + 'version' +] +description = 'Powerful data structures for data analysis, time series, and statistics' +readme = 'README.md' +authors = [ + { name = 'The Pandas Development Team', email='pandas-dev@python.org' }, +] +license = {file = 'LICENSE'} +requires-python = '>=3.8' +dependencies = [ + "numpy>=1.20.3; python_version<'3.10'", + "numpy>=1.21.0; python_version>='3.10'", + "numpy>=1.23.2; python_version>='3.11'", + "python-dateutil>=2.8.2", + "pytz>=2020.1" +] +classifiers = [ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Cython', + 'Programming Language :: Python', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Topic :: Scientific/Engineering' +] + +[project.urls] +homepage = 'https://pandas.pydata.org' +documentation = 'https://pandas.pydata.org/docs/' +repository = 'https://github.com/pandas-dev/pandas' + +[project.entry-points."pandas_plotting_backends"] +matplotlib = "pandas:plotting._matplotlib" + +[project.optional-dependencies] +test = ['hypothesis>=6.34.2', 'pytest>=7.0.0', 'pytest-xdist>=2.2.0', 'pytest-asyncio>=0.17.0'] +performance = ['bottleneck>=1.3.2', 'numba>=0.53.1', 'numexpr>=2.7.1'] +timezone = ['tzdata>=2022.1'] +computation = ['scipy>=1.7.1', 'xarray>=0.21.0'] +fss = ['fsspec>=2021.07.0'] +aws = ['s3fs>=2021.08.0'] +gcp = ['gcsfs>=2021.07.0', 'pandas-gbq>=0.15.0'] +excel = ['odfpy>=1.4.1', 'openpyxl>=3.0.7', 'pyxlsb>=1.0.8', 'xlrd>=2.0.1', 'xlsxwriter>=1.4.3'] +parquet = ['pyarrow>=7.0.0'] +feather = ['pyarrow>=7.0.0'] +hdf5 = [# blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) + #'blosc>=1.20.1', + 'tables>=3.6.1'] +spss = ['pyreadstat>=1.1.2'] +postgresql = ['SQLAlchemy>=1.4.16', 'psycopg2>=2.8.6'] +mysql = ['SQLAlchemy>=1.4.16', 'pymysql>=1.0.2'] +sql-other = ['SQLAlchemy>=1.4.16'] +html = ['beautifulsoup4>=4.9.3', 'html5lib>=1.1', 'lxml>=4.6.3'] +xml = ['lxml>=4.6.3'] +plot = ['matplotlib>=3.6.1'] +output_formatting = ['jinja2>=3.0.0', 'tabulate>=0.8.9'] +clipboard = ['PyQt5>=5.15.1', 'qtpy>=2.2.0'] +compression = ['brotlipy>=0.7.0', 'python-snappy>=0.6.0', 'zstandard>=0.15.2'] +all = ['beautifulsoup4>=5.9.3', + # blosc only available on conda (https://github.com/Blosc/python-blosc/issues/297) + #'blosc>=1.21.0', + 'bottleneck>=1.3.2', + 'brotlipy>=0.7.0', + 'fastparquet>=0.6.3', + 'fsspec>=2021.07.0', + 'gcsfs>=2021.07.0', + 'html5lib>=1.1', + 'hypothesis>=6.34.2', + 'jinja2>=3.0.0', + 'lxml>=4.6.3', + 'matplotlib>=3.6.1', + 'numba>=0.53.1', + 'numexpr>=2.7.3', + 'odfpy>=1.4.1', + 'openpyxl>=3.0.7', + 'pandas-gbq>=0.15.0', + 'psycopg2>=2.8.6', + 'pyarrow>=7.0.0', + 'pymysql>=1.0.2', + 'PyQt5>=5.15.1', + 'pyreadstat>=1.1.2', + 'pytest>=7.0.0', + 'pytest-xdist>=2.2.0', + 'pytest-asyncio>=0.17.0', + 'python-snappy>=0.6.0', + 'pyxlsb>=1.0.8', + 'qtpy>=2.2.0', + 'scipy>=1.7.1', + 's3fs>=2021.08.0', + 'SQLAlchemy>=1.4.16', + 'tables>=3.6.1', + 'tabulate>=0.8.9', + 'tzdata>=2022.1', + 'xarray>=0.21.0', + 'xlrd>=2.0.1', + 'xlsxwriter>=1.4.3', + 'zstandard>=0.15.2'] + +# TODO: Remove after setuptools support is dropped. +[tool.setuptools] +include-package-data = true + +[tool.setuptools.packages.find] +include = ["pandas", "pandas.*"] +namespaces = false + +[tool.setuptools.exclude-package-data] +"*" = ["*.c", "*.h"] + +# See the docstring in versioneer.py for instructions. Note that you must +# re-run 'versioneer.py setup' after changing this section, and commit the +# resulting files. +[tool.versioneer] +VCS = "git" +style = "pep440" +versionfile_source = "pandas/_version.py" +versionfile_build = "pandas/_version.py" +tag_prefix = "v" +parentdir_prefix = "pandas-" + +[tool.cibuildwheel] +skip = "cp36-* cp37-* pp37-* *-manylinux_i686 *_ppc64le *_s390x *-musllinux*" +build-verbosity = "3" +test-requires = "hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17" +test-command = "python {project}/ci/test_wheels.py" + +[tool.cibuildwheel.macos] +archs = "x86_64 arm64" +test-skip = "*_arm64" + +[tool.cibuildwheel.windows] +repair-wheel-command = "python ci/fix_wheels.py {wheel} {dest_dir}" + +[[tool.cibuildwheel.overrides]] +select = "*-win*" +# We test separately for Windows, since we use +# the base windows docker image to check if any dlls are +# missing from the wheel +test-command = "" + +[[tool.cibuildwheel.overrides]] +select = "*-win32" +environment = { IS_32_BIT="true" } + +[tool.black] +target-version = ['py38', 'py39'] +exclude = ''' +( + asv_bench/env + | \.egg + | \.git + | \.hg + | \.mypy_cache + | \.nox + | \.tox + | \.venv + | _build + | buck-out + | build + | dist + | setup.py +) +''' + +[tool.ruff] +line-length = 88 +update-check = false +target-version = "py38" + +select = [ + # pyflakes + "F", + # pycodestyle + "E", + "W", + # flake8-2020 + "YTT", + # flake8-bugbear + "B", + # flake8-quotes + "Q", + # pylint + "PLE", "PLR", "PLW", +] + +ignore = [ + # space before : (needed for how black formats slicing) + # "E203", # not yet implemented + # module level import not at top of file + "E402", + # do not assign a lambda expression, use a def + "E731", + # line break before binary operator + # "W503", # not yet implemented + # line break after binary operator + # "W504", # not yet implemented + # controversial + "B006", + # controversial + "B007", + # controversial + "B008", + # setattr is used to side-step mypy + "B009", + # getattr is used to side-step mypy + "B010", + # tests use assert False + "B011", + # tests use comparisons but not their returned value + "B015", + # false positives + "B019", + # Loop control variable overrides iterable it iterates + "B020", + # Function definition does not bind loop variable + "B023", + # Functions defined inside a loop must not use variables redefined in the loop + # "B301", # not yet implemented + + # Additional checks that don't pass yet + # Within an except clause, raise exceptions with ... + "B904", +] + +exclude = [ + "doc/sphinxext/*.py", + "doc/build/*.py", + "doc/temp/*.py", + ".eggs/*.py", + "versioneer.py", + # exclude asv benchmark environments from linting + "env", +] + +[tool.pylint.messages_control] +max-line-length = 88 +disable = [ + # intentionally turned off + "broad-except", + "c-extension-no-member", + "comparison-with-itself", + "import-error", + "import-outside-toplevel", + "invalid-name", + "invalid-unary-operand-type", + "line-too-long", + "no-else-continue", + "no-else-raise", + "no-else-return", + "no-member", + "no-name-in-module", + "not-an-iterable", + "overridden-final-method", + "pointless-statement", + "redundant-keyword-arg", + "singleton-comparison", + "too-many-ancestors", + "too-many-arguments", + "too-many-boolean-expressions", + "too-many-branches", + "too-many-function-args", + "too-many-instance-attributes", + "too-many-locals", + "too-many-nested-blocks", + "too-many-public-methods", + "too-many-return-statements", + "too-many-statements", + "unexpected-keyword-arg", + "ungrouped-imports", + "unsubscriptable-object", + "unsupported-assignment-operation", + "unsupported-membership-test", + "unused-import", + "use-implicit-booleaness-not-comparison", + "use-implicit-booleaness-not-len", + "wrong-import-order", + "wrong-import-position", + + # misc + "abstract-class-instantiated", + "no-value-for-parameter", + "undefined-variable", + "unpacking-non-sequence", + + # pylint type "C": convention, for programming standard violation + "missing-class-docstring", + "missing-function-docstring", + "missing-module-docstring", + "too-many-lines", + "unidiomatic-typecheck", + "unnecessary-dunder-call", + "unnecessary-lambda-assignment", + + # pylint type "R": refactor, for bad code smell + "consider-using-with", + "cyclic-import", + "duplicate-code", + "inconsistent-return-statements", + "redefined-argument-from-local", + "too-few-public-methods", + + # pylint type "W": warning, for python specific problems + "abstract-method", + "arguments-differ", + "arguments-out-of-order", + "arguments-renamed", + "attribute-defined-outside-init", + "comparison-with-callable", + "dangerous-default-value", + "deprecated-module", + "eval-used", + "expression-not-assigned", + "fixme", + "global-statement", + "invalid-overridden-method", + "keyword-arg-before-vararg", + "possibly-unused-variable", + "protected-access", + "raise-missing-from", + "redefined-builtin", + "redefined-outer-name", + "self-cls-assignment", + "signature-differs", + "super-init-not-called", + "try-except-raise", + "unnecessary-lambda", + "unspecified-encoding", + "unused-argument", + "unused-variable", + "using-constant-test" +] + +[tool.pytest.ini_options] +# sync minversion with pyproject.toml & install.rst +minversion = "7.0" +addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" +empty_parameter_set_mark = "fail_at_collect" +xfail_strict = true +testpaths = "pandas" +doctest_optionflags = [ + "NORMALIZE_WHITESPACE", + "IGNORE_EXCEPTION_DETAIL", + "ELLIPSIS", +] +filterwarnings = [ + # Will be fixed in numba 0.56: https://github.com/numba/numba/issues/7758 + "ignore:`np.MachAr` is deprecated:DeprecationWarning:numba", + "ignore:.*urllib3:DeprecationWarning:botocore", + "ignore:Setuptools is replacing distutils.:UserWarning:_distutils_hack", + # https://github.com/PyTables/PyTables/issues/822 + "ignore:a closed node found in the registry:UserWarning:tables", + "ignore:`np.object` is a deprecated:DeprecationWarning:tables", + "ignore:tostring:DeprecationWarning:tables", + "ignore:distutils Version classes are deprecated:DeprecationWarning:numexpr", + "ignore:distutils Version classes are deprecated:DeprecationWarning:fastparquet", + "ignore:distutils Version classes are deprecated:DeprecationWarning:fsspec", +] +junit_family = "xunit2" +markers = [ + "single_cpu: tests that should run on a single cpu only", + "slow: mark a test as slow", + "network: mark a test as network", + "db: tests requiring a database (mysql or postgres)", + "clipboard: mark a pd.read_clipboard test", + "arm_slow: mark a test as slow for arm64 architecture", + "arraymanager: mark a test to run with ArrayManager enabled", +] +asyncio_mode = "strict" + +[tool.mypy] +# Import discovery +mypy_path = "typings" +files = ["pandas", "typings"] +namespace_packages = false +explicit_package_bases = false +ignore_missing_imports = true +follow_imports = "normal" +follow_imports_for_stubs = false +no_site_packages = false +no_silence_site_packages = false +# Platform configuration +python_version = "3.8" +platform = "linux-64" +# Disallow dynamic typing +disallow_any_unimported = false # TODO +disallow_any_expr = false # TODO +disallow_any_decorated = false # TODO +disallow_any_explicit = false # TODO +disallow_any_generics = false # TODO +disallow_subclassing_any = false # TODO +# Untyped definitions and calls +disallow_untyped_calls = false # TODO +disallow_untyped_defs = false # TODO +disallow_incomplete_defs = false # TODO +check_untyped_defs = true +disallow_untyped_decorators = true +# None and Optional handling +no_implicit_optional = true +strict_optional = true +# Configuring warnings +warn_redundant_casts = true +warn_unused_ignores = true +warn_no_return = true +warn_return_any = false # TODO +warn_unreachable = false # GH#27396 +# Suppressing errors +ignore_errors = false +enable_error_code = "ignore-without-code" +# Miscellaneous strictness flags +allow_untyped_globals = false +allow_redefinition = false +local_partial_types = false +implicit_reexport = true +strict_equality = true +# Configuring error messages +show_error_context = false +show_column_numbers = false +show_error_codes = true + +[[tool.mypy.overrides]] +module = [ + "pandas.tests.*", + "pandas._version", + "pandas.io.clipboard", +] +check_untyped_defs = false + +[[tool.mypy.overrides]] +module = [ + "pandas.tests.apply.test_series_apply", + "pandas.tests.arithmetic.conftest", + "pandas.tests.arrays.sparse.test_combine_concat", + "pandas.tests.dtypes.test_common", + "pandas.tests.frame.methods.test_to_records", + "pandas.tests.groupby.test_rank", + "pandas.tests.groupby.transform.test_transform", + "pandas.tests.indexes.interval.test_interval", + "pandas.tests.indexing.test_categorical", + "pandas.tests.io.excel.test_writers", + "pandas.tests.reductions.test_reductions", + "pandas.tests.test_expressions", +] +ignore_errors = true + +# To be kept consistent with "Import Formatting" section in contributing.rst +[tool.isort] +known_pre_libs = "pandas._config" +known_pre_core = ["pandas._libs", "pandas._typing", "pandas.util._*", "pandas.compat", "pandas.errors"] +known_dtypes = "pandas.core.dtypes" +known_post_core = ["pandas.tseries", "pandas.io", "pandas.plotting"] +sections = ["FUTURE", "STDLIB", "THIRDPARTY" ,"PRE_LIBS" , "PRE_CORE", "DTYPES", "FIRSTPARTY", "POST_CORE", "LOCALFOLDER"] +profile = "black" +combine_as_imports = true +force_grid_wrap = 2 +force_sort_within_sections = true +skip_glob = "env" +skip = "pandas/__init__.py" + +[tool.pyright] +pythonVersion = "3.8" +typeCheckingMode = "basic" +include = ["pandas", "typings"] +exclude = ["pandas/tests", "pandas/io/clipboard", "pandas/util/version"] +# enable subset of "strict" +reportDuplicateImport = true +reportInvalidStubStatement = true +reportOverlappingOverload = true +reportPropertyTypeMismatch = true +reportUntypedClassDecorator = true +reportUntypedFunctionDecorator = true +reportUntypedNamedTuple = true +reportUnusedImport = true +# disable subset of "basic" +reportGeneralTypeIssues = false +reportMissingModuleSource = false +reportOptionalCall = false +reportOptionalIterable = false +reportOptionalMemberAccess = false +reportOptionalOperand = false +reportOptionalSubscript = false +reportPrivateImportUsage = false +reportUnboundVariable = false + +[tool.coverage.run] +branch = true +omit = ["pandas/_typing.py", "pandas/_version.py"] +plugins = ["Cython.Coverage"] +source = ["pandas"] + +[tool.coverage.report] +ignore_errors = false +show_missing = true +omit = ["pandas/_version.py"] +exclude_lines = [ + # Have to re-enable the standard pragma + "pragma: no cover", + # Don't complain about missing debug-only code:s + "def __repr__", + "if self.debug", + # Don't complain if tests don't hit defensive assertion code: + "raise AssertionError", + "raise NotImplementedError", + "AbstractMethodError", + # Don't complain if non-runnable code isn't run: + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:", +] + +[tool.coverage.html] +directory = "coverage_html_report" + +[tool.codespell] +ignore-words-list = "blocs, coo, hist, nd, sav, ser, recuse" +ignore-regex = 'https://([\w/\.])+' diff --git a/scripts/tests/data/deps_unmodified_duplicate_package.yaml b/scripts/tests/data/deps_unmodified_duplicate_package.yaml new file mode 100644 index 0000000000000..72721c2842707 --- /dev/null +++ b/scripts/tests/data/deps_unmodified_duplicate_package.yaml @@ -0,0 +1,4 @@ +# Test: duplicate package +dependencies: + - jinja2>=3.0.0 + - jinja2>=3.0.0 diff --git a/scripts/tests/data/deps_unmodified_no_version.yaml b/scripts/tests/data/deps_unmodified_no_version.yaml new file mode 100644 index 0000000000000..c57b49a003efd --- /dev/null +++ b/scripts/tests/data/deps_unmodified_no_version.yaml @@ -0,0 +1,5 @@ +# Test: empty version +dependencies: + - jinja2 + - scipy + - SQLAlchemy diff --git a/scripts/tests/data/deps_unmodified_random.yaml b/scripts/tests/data/deps_unmodified_random.yaml new file mode 100644 index 0000000000000..4ca758af1c8ad --- /dev/null +++ b/scripts/tests/data/deps_unmodified_random.yaml @@ -0,0 +1,57 @@ +# Test: random +name: pandas-dev +channels: + - conda-forge +dependencies: + - python=3.8 + + # build dependencies + - versioneer[toml] + - cython>=0.29.32 + + # test dependencies + - pytest>=7.0.0 + - pytest-cov + - pytest-xdist>=2.2.0 + - psutil + - pytest-asyncio>=0.17 + - boto3 + + # required dependencies + - python-dateutil + - numpy + - pytz + + # optional dependencies + - beautifulsoup4 + - blosc + - bottleneck>=1.3.2 + - brotlipy + - fastparquet>=0.6.3 + - fsspec>=2021.07.0 + - html5lib>=1.1 + - hypothesis + - gcsfs>=2021.07.0 + - jinja2 + - lxml>=4.6.3 + - matplotlib>=3.6.1 + - numba + - numexpr>=2.7.3 + - openpyxl>=3.0.7 + - odfpy>=1.4.1 + - pandas-gbq>=0.15.0 + - psycopg2 + - pyarrow<11, >=7.0.0 + - pymysql>=1.0.2 + - pyreadstat>=1.1.2 + - pytables>=3.6.1 + - python-snappy>=0.6.0 + - pyxlsb>=1.0.8 + - s3fs>=2021.08.0 + - scipy>=1.7.1 + - sqlalchemy>=1.4.16 + - tabulate>=0.8.9 + - xarray>=0.21.0 + - xlrd>=2.0.1 + - xlsxwriter>=1.4.3 + - zstandard>=0.15.2 diff --git a/scripts/tests/data/deps_unmodified_range.yaml b/scripts/tests/data/deps_unmodified_range.yaml new file mode 100644 index 0000000000000..22882af2cbc4b --- /dev/null +++ b/scripts/tests/data/deps_unmodified_range.yaml @@ -0,0 +1,5 @@ +# Test: range +dependencies: + - jinja2<8 + - scipy<9 + - SQLAlchemy<2.0 diff --git a/scripts/tests/data/deps_unmodified_same_version.yaml b/scripts/tests/data/deps_unmodified_same_version.yaml new file mode 100644 index 0000000000000..e07b221ecd44f --- /dev/null +++ b/scripts/tests/data/deps_unmodified_same_version.yaml @@ -0,0 +1,3 @@ +# Test: same version +dependencies: + - jinja2>=3.0.0 diff --git a/scripts/tests/test_validate_min_versions_in_sync.py b/scripts/tests/test_validate_min_versions_in_sync.py new file mode 100644 index 0000000000000..13e8965bb7591 --- /dev/null +++ b/scripts/tests/test_validate_min_versions_in_sync.py @@ -0,0 +1,61 @@ +import pathlib +import sys + +import pytest +import yaml + +if sys.version_info >= (3, 11): + import tomllib +else: + import tomli as tomllib + +from scripts.validate_min_versions_in_sync import ( + get_toml_map_from, + get_yaml_map_from, + pin_min_versions_to_yaml_file, +) + + +@pytest.mark.parametrize( + "src_toml, src_yaml, expected_yaml", + [ + ( + pathlib.Path("scripts/tests/data/deps_minimum.toml"), + pathlib.Path("scripts/tests/data/deps_unmodified_random.yaml"), + pathlib.Path("scripts/tests/data/deps_expected_random.yaml"), + ), + ( + pathlib.Path("scripts/tests/data/deps_minimum.toml"), + pathlib.Path("scripts/tests/data/deps_unmodified_same_version.yaml"), + pathlib.Path("scripts/tests/data/deps_expected_same_version.yaml"), + ), + ( + pathlib.Path("scripts/tests/data/deps_minimum.toml"), + pathlib.Path("scripts/tests/data/deps_unmodified_duplicate_package.yaml"), + pathlib.Path("scripts/tests/data/deps_expected_duplicate_package.yaml"), + ), + ( + pathlib.Path("scripts/tests/data/deps_minimum.toml"), + pathlib.Path("scripts/tests/data/deps_unmodified_no_version.yaml"), + pathlib.Path("scripts/tests/data/deps_expected_no_version.yaml"), + ), + ( + pathlib.Path("scripts/tests/data/deps_minimum.toml"), + pathlib.Path("scripts/tests/data/deps_unmodified_range.yaml"), + pathlib.Path("scripts/tests/data/deps_expected_range.yaml"), + ), + ], +) +def test_pin_min_versions_to_yaml_file(src_toml, src_yaml, expected_yaml): + with open(src_toml, "rb") as toml_f: + toml_map = tomllib.load(toml_f) + with open(src_yaml) as yaml_f: + yaml_file_data = yaml_f.read() + yaml_file = yaml.safe_load(yaml_file_data) + yaml_dependencies = yaml_file["dependencies"] + yaml_map = get_yaml_map_from(yaml_dependencies) + toml_map = get_toml_map_from(toml_map) + result_yaml_file = pin_min_versions_to_yaml_file(yaml_map, toml_map, yaml_file_data) + with open(expected_yaml) as yaml_f: + dummy_yaml_expected_file_1 = yaml_f.read() + assert result_yaml_file == dummy_yaml_expected_file_1 diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 7c102096c1690..3c12f17fe72cf 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -17,18 +17,31 @@ import pathlib import sys +import yaml + if sys.version_info >= (3, 11): import tomllib else: import tomli as tomllib +from typing import Any + +from scripts.generate_pip_deps_from_conda import RENAME + DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve() CI_PATH = next( pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml") ) CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve() SETUP_PATH = pathlib.Path("pyproject.toml").resolve() +YAML_PATH = pathlib.Path("ci/deps") +ENV_PATH = pathlib.Path("environment.yml") EXCLUDE_DEPS = {"tzdata", "blosc"} +EXCLUSION_LIST = { + "python=3.8[build=*_pypy]": None, + "tzdata": None, + "pyarrow": None, +} # pandas package is not available # in pre-commit environment sys.path.append("pandas/compat") @@ -41,6 +54,147 @@ import _optional +def pin_min_versions_to_ci_deps() -> int: + """ + Pin minimum versions to CI dependencies. + + Pip dependencies are not pinned. + """ + all_yaml_files = list(YAML_PATH.iterdir()) + all_yaml_files.append(ENV_PATH) + toml_dependencies = {} + with open(SETUP_PATH, "rb") as toml_f: + toml_dependencies = tomllib.load(toml_f) + ret = 0 + for curr_file in all_yaml_files: + with open(curr_file) as yaml_f: + yaml_start_data = yaml_f.read() + yaml_file = yaml.safe_load(yaml_start_data) + yaml_dependencies = yaml_file["dependencies"] + yaml_map = get_yaml_map_from(yaml_dependencies) + toml_map = get_toml_map_from(toml_dependencies) + yaml_result_data = pin_min_versions_to_yaml_file( + yaml_map, toml_map, yaml_start_data + ) + if yaml_result_data != yaml_start_data: + with open(curr_file, "w") as f: + f.write(yaml_result_data) + ret |= 1 + return ret + + +def get_toml_map_from(toml_dic: dict[str, Any]) -> dict[str, str]: + toml_deps = {} + toml_dependencies = set(toml_dic["project"]["optional-dependencies"]["all"]) + for dependency in toml_dependencies: + toml_package, toml_version = dependency.strip().split(">=") + toml_deps[toml_package] = toml_version + return toml_deps + + +def get_operator_from(dependency: str) -> str | None: + if "<=" in dependency: + operator = "<=" + elif ">=" in dependency: + operator = ">=" + elif "=" in dependency: + operator = "=" + elif ">" in dependency: + operator = ">" + elif "<" in dependency: + operator = "<" + else: + operator = None + return operator + + +def get_yaml_map_from( + yaml_dic: list[str | dict[str, list[str]]] +) -> dict[str, list[str] | None]: + yaml_map: dict[str, list[str] | None] = {} + for dependency in yaml_dic: + if ( + isinstance(dependency, dict) + or dependency in EXCLUSION_LIST + or dependency in yaml_map + ): + continue + search_text = str(dependency) + operator = get_operator_from(search_text) + if "," in dependency: + yaml_dependency, yaml_version1 = search_text.split(",") + operator = get_operator_from(yaml_dependency) + assert operator is not None + yaml_package, yaml_version2 = yaml_dependency.split(operator) + yaml_version2 = operator + yaml_version2 + yaml_map[yaml_package] = [yaml_version1, yaml_version2] + elif operator is not None: + yaml_package, yaml_version = search_text.split(operator) + yaml_version = operator + yaml_version + yaml_map[yaml_package] = [yaml_version] + else: + yaml_package, yaml_version = search_text.strip(), None + yaml_map[yaml_package] = yaml_version + return yaml_map + + +def clean_version_list( + yaml_versions: list[str], toml_version: version.Version +) -> list[str]: + for i in range(len(yaml_versions)): + yaml_version = yaml_versions[i] + operator = get_operator_from(yaml_version) + assert operator is not None + if "<=" in operator or ">=" in operator: + yaml_version = yaml_version[2:] + else: + yaml_version = yaml_version[1:] + yaml_version = version.parse(yaml_version) + if yaml_version < toml_version: + yaml_versions[i] = "-" + str(yaml_version) + elif yaml_version >= toml_version: + if ">" in operator: + yaml_versions[i] = "-" + str(yaml_version) + return yaml_versions + + +def pin_min_versions_to_yaml_file( + yaml_map: dict[str, list[str] | None], toml_map: dict[str, str], yaml_file_data: str +) -> str: + data = yaml_file_data + for yaml_package, yaml_versions in yaml_map.items(): + if yaml_package in EXCLUSION_LIST: + continue + old_dep = yaml_package + if yaml_versions is not None: + for yaml_version in yaml_versions: + old_dep += yaml_version + ", " + old_dep = old_dep[:-2] + if RENAME.get(yaml_package, yaml_package) in toml_map: + min_dep = toml_map[RENAME.get(yaml_package, yaml_package)] + elif yaml_package in toml_map: + min_dep = toml_map[yaml_package] + else: + continue + if yaml_versions is None: + new_dep = old_dep + ">=" + min_dep + data = data.replace(old_dep, new_dep, 1) + continue + toml_version = version.parse(min_dep) + yaml_versions = clean_version_list(yaml_versions, toml_version) + cleaned_yaml_versions = [x for x in yaml_versions if "-" not in x] + new_dep = yaml_package + for yaml_version in cleaned_yaml_versions: + new_dep += yaml_version + ", " + operator = get_operator_from(new_dep) + if operator != "=": + new_dep += ">=" + min_dep + else: + new_dep = new_dep[:-2] + data = data.replace(old_dep, new_dep) + return data + + def get_versions_from_code() -> dict[str, str]: """Min versions for checking within pandas code.""" install_map = _optional.INSTALL_MAPPING @@ -92,7 +246,6 @@ def get_versions_from_toml() -> dict[str, str]: """Min versions in pyproject.toml for pip install pandas[extra].""" install_map = _optional.INSTALL_MAPPING optional_dependencies = {} - with open(SETUP_PATH, "rb") as pyproject_f: pyproject_toml = tomllib.load(pyproject_f) opt_deps = pyproject_toml["project"]["optional-dependencies"] @@ -108,11 +261,12 @@ def get_versions_from_toml() -> dict[str, str]: for item in EXCLUDE_DEPS: optional_dependencies.pop(item, None) - return optional_dependencies -def main(): +def main() -> int: + ret = 0 + ret |= pin_min_versions_to_ci_deps() with open(CI_PATH, encoding="utf-8") as f: _, ci_optional = get_versions_from_ci(f.readlines()) code_optional = get_versions_from_code() @@ -138,9 +292,9 @@ def main(): f"{CODE_PATH}: {code_optional.get(package, 'Not specified')}\n" f"{SETUP_PATH}: {setup_optional.get(package, 'Not specified')}\n\n" ) - sys.exit(1) - sys.exit(0) + ret |= 1 + return ret if __name__ == "__main__": - main() + sys.exit(main())