From bab006a1ee26b9c7068ecd24b21e716b73b3960e Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sat, 8 Jul 2023 16:07:44 -0700 Subject: [PATCH 01/10] BLD: Shrink sdist/wheel sizes --- .gitattributes | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/.gitattributes b/.gitattributes index 736fa09d070fe..e1c7b11e39b73 100644 --- a/.gitattributes +++ b/.gitattributes @@ -14,3 +14,69 @@ *.xls binary *.xlsx binary pandas/_version.py export-subst + + +*.bz2 export-ignore +*.csv export-ignore +*.data export-ignore +*.dta export-ignore +*.feather export-ignore +*.tar export-ignore +*.gz export-ignore +*.h5 export-ignore +*.html export-ignore +*.json export-ignore +*.jsonl export-ignore +*.kml export-ignore +*.msgpack export-ignore +*.pdf export-ignore +*.parquet export-ignore +*.pickle export-ignore +*.pkl export-ignore +*.png export-ignore +*.pptx export-ignore +*.ods export-ignore +*.odt export-ignore +*.orc export-ignore +*.sas7bdat export-ignore +*.sav export-ignore +*.so export-ignore +*.txt export-ignore +*.xls export-ignore +*.xlsb export-ignore +*.xlsm export-ignore +*.xlsx export-ignore +*.xpt export-ignore +*.cpt export-ignore +*.xml export-ignore +*.xsl export-ignore +*.xz export-ignore +*.zip export-ignore +*.zst export-ignore +*~ export-ignore +.DS_Store export-ignore +.git* export-ignore + +*.py[ocd] export-ignore +*.pxi export-ignore + +# Ignoring stuff from the top level +asv_bench export-ignore +ci export-ignore +doc export-ignore +gitpod export-ignore +MANIFEST.in export-ignore +scripts export-ignore +typings export-ignore +web export-ignore +CITATION.cff export-ignore +codecov.yml export-ignore +Dockerfile export-ignore +environment.yml export-ignore +setup.py export-ignore + + +# GH 39321 +# csv_dir_path fixture checks the existence of the directory +# exclude the whole directory to avoid running related tests in sdist +pandas/tests/io/parser/data export-ignore From 909448ff6c7074507685e86b2661ab8374f28800 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 14:50:38 -0700 Subject: [PATCH 02/10] build wheel from sdist --- .gitattributes | 2 ++ .github/workflows/wheels.yml | 25 ++++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.gitattributes b/.gitattributes index e1c7b11e39b73..19c6fd2fd1d47 100644 --- a/.gitattributes +++ b/.gitattributes @@ -61,6 +61,8 @@ pandas/_version.py export-subst *.pxi export-ignore # Ignoring stuff from the top level +.circleci export-ignore +.github export-ignore asv_bench export-ignore ci export-ignore doc export-ignore diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f1f9646054132..51d243bd54c8a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -103,11 +103,24 @@ jobs: with: fetch-depth: 0 - - name: Download sdist - uses: actions/download-artifact@v3 - with: - name: sdist - path: ./dist + # - name: Download sdist + # uses: actions/download-artifact@v3 + # with: + # name: sdist + # path: ./dist + + # We need to build wheels from the sdist since the sdist + # removes unnecessary files from the release + + - name: Build sdist + run: | + python -m pip install build + python -m build --sdist + + - name: Output sdist name + id: save-sdist-path + shell: bash -el {0} + run: echo "sdist_name=$(ls ./dist)" >> "$GITHUB_ENV" - name: Build wheels uses: pypa/cibuildwheel@v2.13.1 @@ -116,6 +129,8 @@ jobs: # within Github that makes the sdist be missing files #with: # package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} + with: + package-dir: ./dist/${{ env.sdist_name }} env: CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} From 63c7dabdc4ce3a49f8e03839c210e17a16909528 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 14:59:48 -0700 Subject: [PATCH 03/10] try building from sdist again --- .github/workflows/wheels.yml | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index adf228386a518..791bc48c7b7c7 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -104,34 +104,21 @@ jobs: with: fetch-depth: 0 - # - name: Download sdist - # uses: actions/download-artifact@v3 - # with: - # name: sdist - # path: ./dist - # We need to build wheels from the sdist since the sdist # removes unnecessary files from the release - - - name: Build sdist - run: | - python -m pip install build - python -m build --sdist - - - name: Output sdist name - id: save-sdist-path - shell: bash -el {0} - run: echo "sdist_name=$(ls ./dist)" >> "$GITHUB_ENV" + - name: Download sdist + uses: actions/download-artifact@v3 + with: + name: sdist + path: ./dist - name: Build wheels uses: pypa/cibuildwheel@v2.14.1 # TODO: Build wheels from sdist again # There's some sort of weird race condition? # within Github that makes the sdist be missing files - #with: - # package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} with: - package-dir: ./dist/${{ env.sdist_name }} + package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }} env: CIBW_PRERELEASE_PYTHONS: True CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} From f14173dce2815f658643b47b8ca5f0f869006093 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 15:46:27 -0700 Subject: [PATCH 04/10] change to no-strict-data-files? --- .github/workflows/wheels.yml | 2 +- pandas/conftest.py | 14 +++++++------- pyproject.toml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index f1f9646054132..5ce7dee7e8888 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -142,7 +142,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test()`'; + python -c `'import pandas as pd; pd.test(extra_args=[\`"--no-strict-data-files\`"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD diff --git a/pandas/conftest.py b/pandas/conftest.py index b2f1377a9fb32..98d0e55227340 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -97,9 +97,9 @@ def pytest_addoption(parser) -> None: parser.addoption( - "--strict-data-files", - action="store_true", - help="Fail if a test is skipped for missing data file.", + "--no-strict-data-files", + action="store_false", + help="Don't fail if a test is skipped for missing data file.", ) @@ -1163,9 +1163,9 @@ def all_numeric_accumulations(request): @pytest.fixture def strict_data_files(pytestconfig): """ - Returns the configuration for the test setting `--strict-data-files`. + Returns the configuration for the test setting `--no-strict-data-files`. """ - return pytestconfig.getoption("--strict-data-files") + return pytestconfig.getoption("--no-strict-data-files") @pytest.fixture @@ -1195,7 +1195,7 @@ def datapath(strict_data_files: str) -> Callable[..., str]: Raises ------ ValueError - If the path doesn't exist and the --strict-data-files option is set. + If the path doesn't exist and the --no-strict-data-files option is not set. """ BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") @@ -1204,7 +1204,7 @@ def deco(*args): if not os.path.exists(path): if strict_data_files: raise ValueError( - f"Could not find file {path} and --strict-data-files is set." + f"Could not find file {path} and --no-strict-data-files is not set." ) pytest.skip(f"Could not find {path}.") return path diff --git a/pyproject.toml b/pyproject.toml index a2ae269c26667..708a3419d8ecf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -149,8 +149,8 @@ environment = {LDFLAGS="-Wl,--strip-all"} test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17" test-command = """ PANDAS_CI='1' python -c 'import pandas as pd; \ - pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "-n 2"]); \ - pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db"]);' \ + pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "-n 2", "--no-strict-data-files"]); \ + pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \ """ [tool.cibuildwheel.macos] From 2a642903fcca3d5d76e290cd35b931c32430a4de Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 15:58:59 -0700 Subject: [PATCH 05/10] remove marker --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index cb77360312e95..6e82b200bb1c7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -471,7 +471,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst minversion = "7.3.2" -addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" +addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true testpaths = "pandas" From 3fdc7cfc14680be9273ccdd756db410679c3af99 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 16:35:02 -0700 Subject: [PATCH 06/10] use datapath --- .github/workflows/wheels.yml | 2 +- pandas/tests/io/xml/conftest.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 090af4d498038..1101648238d2a 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -146,7 +146,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test(extra_args=[\`"--no-strict-data-files\`"])`'; + python -c `'import pandas as pd; pd.test(extra_args=[`"--no-strict-data-files`"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD diff --git a/pandas/tests/io/xml/conftest.py b/pandas/tests/io/xml/conftest.py index 510e22fb32e77..c88616eb78029 100644 --- a/pandas/tests/io/xml/conftest.py +++ b/pandas/tests/io/xml/conftest.py @@ -2,35 +2,35 @@ @pytest.fixture -def xml_data_path(tests_io_data_path): +def xml_data_path(tests_io_data_path, datapath): return tests_io_data_path / "xml" @pytest.fixture -def xml_books(xml_data_path): - return xml_data_path / "books.xml" +def xml_books(xml_data_path, datapath): + return datapath(xml_data_path / "books.xml") @pytest.fixture -def xml_doc_ch_utf(xml_data_path): - return xml_data_path / "doc_ch_utf.xml" +def xml_doc_ch_utf(xml_data_path, datapath): + return datapath(xml_data_path / "doc_ch_utf.xml") @pytest.fixture -def xml_baby_names(xml_data_path): - return xml_data_path / "baby_names.xml" +def xml_baby_names(xml_data_path, datapath): + return datapath(xml_data_path / "baby_names.xml") @pytest.fixture -def kml_cta_rail_lines(xml_data_path): - return xml_data_path / "cta_rail_lines.kml" +def kml_cta_rail_lines(xml_data_path, datapath): + return datapath(xml_data_path / "cta_rail_lines.kml") @pytest.fixture -def xsl_flatten_doc(xml_data_path): - return xml_data_path / "flatten_doc.xsl" +def xsl_flatten_doc(xml_data_path, datapath): + return datapath(xml_data_path / "flatten_doc.xsl") @pytest.fixture -def xsl_row_field_output(xml_data_path): - return xml_data_path / "row_field_output.xsl" +def xsl_row_field_output(xml_data_path, datapath): + return datapath(xml_data_path / "row_field_output.xsl") From f08dd2a63ed275e5ea037f6c5c04760ce35e1072 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 17:49:41 -0700 Subject: [PATCH 07/10] fix windows --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1101648238d2a..b82e7ee76b6f8 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -146,7 +146,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test(extra_args=[`"--no-strict-data-files`"])`'; + python -c `'import pandas as pd; pd.test(extra_args=[\"--no-strict-data-files\"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD From f3588c853340e886f55f201fcca9eddf2b10c346 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 20:57:10 -0700 Subject: [PATCH 08/10] no quotes needed? --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index b82e7ee76b6f8..d10cdde01b447 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -146,7 +146,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test(extra_args=[\"--no-strict-data-files\"])`'; + python -c `'import pandas as pd; pd.test(extra_args=["--no-strict-data-files"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD From ba44bb467f3d0edc86c2c26bbbaf75d5ded39c77 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 21:13:04 -0700 Subject: [PATCH 09/10] try again --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index d10cdde01b447..494e3c685beb5 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -146,7 +146,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test(extra_args=["--no-strict-data-files"])`'; + python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD From 04bd7d6509d5dbd0b4469eec8e2e6a39902ac465 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Sun, 30 Jul 2023 22:09:32 -0700 Subject: [PATCH 10/10] Update wheels.yml --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 494e3c685beb5..77ab152ce712e 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -146,7 +146,7 @@ jobs: $TST_CMD = @" python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17; python -m pip install --find-links=pandas\wheelhouse --no-index pandas; - python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\"])`'; + python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`'; "@ docker pull python:${{ matrix.python[1] }}-windowsservercore docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD