Skip to content

BLD: Shrink sdist/wheel sizes #54052

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jul 31, 2023
68 changes: 68 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,71 @@
*.xls binary
*.xlsx binary
pandas/_version.py export-subst


*.bz2 export-ignore
*.csv export-ignore
*.data export-ignore
*.dta export-ignore
*.feather export-ignore
*.tar export-ignore
*.gz export-ignore
*.h5 export-ignore
*.html export-ignore
*.json export-ignore
*.jsonl export-ignore
*.kml export-ignore
*.msgpack export-ignore
*.pdf export-ignore
*.parquet export-ignore
*.pickle export-ignore
*.pkl export-ignore
*.png export-ignore
*.pptx export-ignore
*.ods export-ignore
*.odt export-ignore
*.orc export-ignore
*.sas7bdat export-ignore
*.sav export-ignore
*.so export-ignore
*.txt export-ignore
*.xls export-ignore
*.xlsb export-ignore
*.xlsm export-ignore
*.xlsx export-ignore
*.xpt export-ignore
*.cpt export-ignore
*.xml export-ignore
*.xsl export-ignore
*.xz export-ignore
*.zip export-ignore
*.zst export-ignore
*~ export-ignore
.DS_Store export-ignore
.git* export-ignore

*.py[ocd] export-ignore
*.pxi export-ignore

# Ignoring stuff from the top level
.circleci export-ignore
.github export-ignore
asv_bench export-ignore
ci export-ignore
doc export-ignore
gitpod export-ignore
MANIFEST.in export-ignore
scripts export-ignore
typings export-ignore
web export-ignore
CITATION.cff export-ignore
codecov.yml export-ignore
Dockerfile export-ignore
environment.yml export-ignore
setup.py export-ignore


# GH 39321
# csv_dir_path fixture checks the existence of the directory
# exclude the whole directory to avoid running related tests in sdist
pandas/tests/io/parser/data export-ignore
8 changes: 5 additions & 3 deletions .github/workflows/wheels.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ jobs:
with:
fetch-depth: 0

# We need to build wheels from the sdist since the sdist
# removes unnecessary files from the release
- name: Download sdist
uses: actions/download-artifact@v3
with:
Expand All @@ -115,8 +117,8 @@ jobs:
# TODO: Build wheels from sdist again
# There's some sort of weird race condition?
# within Github that makes the sdist be missing files
#with:
# package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
with:
package-dir: ./dist/${{ needs.build_sdist.outputs.sdist_file }}
env:
CIBW_PRERELEASE_PYTHONS: True
CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
Expand Down Expand Up @@ -144,7 +146,7 @@ jobs:
$TST_CMD = @"
python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
python -m pip install --find-links=pandas\wheelhouse --no-index pandas;
python -c `'import pandas as pd; pd.test()`';
python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
"@
docker pull python:${{ matrix.python[1] }}-windowsservercore
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD
Expand Down
14 changes: 7 additions & 7 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,9 @@

def pytest_addoption(parser) -> None:
parser.addoption(
"--strict-data-files",
action="store_true",
help="Fail if a test is skipped for missing data file.",
"--no-strict-data-files",
action="store_false",
help="Don't fail if a test is skipped for missing data file.",
)


Expand Down Expand Up @@ -1172,9 +1172,9 @@ def all_numeric_accumulations(request):
@pytest.fixture
def strict_data_files(pytestconfig):
"""
Returns the configuration for the test setting `--strict-data-files`.
Returns the configuration for the test setting `--no-strict-data-files`.
"""
return pytestconfig.getoption("--strict-data-files")
return pytestconfig.getoption("--no-strict-data-files")


@pytest.fixture
Expand Down Expand Up @@ -1204,7 +1204,7 @@ def datapath(strict_data_files: str) -> Callable[..., str]:
Raises
------
ValueError
If the path doesn't exist and the --strict-data-files option is set.
If the path doesn't exist and the --no-strict-data-files option is not set.
"""
BASE_PATH = os.path.join(os.path.dirname(__file__), "tests")

Expand All @@ -1213,7 +1213,7 @@ def deco(*args):
if not os.path.exists(path):
if strict_data_files:
raise ValueError(
f"Could not find file {path} and --strict-data-files is set."
f"Could not find file {path} and --no-strict-data-files is not set."
)
pytest.skip(f"Could not find {path}.")
return path
Expand Down
26 changes: 13 additions & 13 deletions pandas/tests/io/xml/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,35 @@


@pytest.fixture
def xml_data_path(tests_io_data_path):
def xml_data_path(tests_io_data_path, datapath):
return tests_io_data_path / "xml"


@pytest.fixture
def xml_books(xml_data_path):
return xml_data_path / "books.xml"
def xml_books(xml_data_path, datapath):
return datapath(xml_data_path / "books.xml")


@pytest.fixture
def xml_doc_ch_utf(xml_data_path):
return xml_data_path / "doc_ch_utf.xml"
def xml_doc_ch_utf(xml_data_path, datapath):
return datapath(xml_data_path / "doc_ch_utf.xml")


@pytest.fixture
def xml_baby_names(xml_data_path):
return xml_data_path / "baby_names.xml"
def xml_baby_names(xml_data_path, datapath):
return datapath(xml_data_path / "baby_names.xml")


@pytest.fixture
def kml_cta_rail_lines(xml_data_path):
return xml_data_path / "cta_rail_lines.kml"
def kml_cta_rail_lines(xml_data_path, datapath):
return datapath(xml_data_path / "cta_rail_lines.kml")


@pytest.fixture
def xsl_flatten_doc(xml_data_path):
return xml_data_path / "flatten_doc.xsl"
def xsl_flatten_doc(xml_data_path, datapath):
return datapath(xml_data_path / "flatten_doc.xsl")


@pytest.fixture
def xsl_row_field_output(xml_data_path):
return xml_data_path / "row_field_output.xsl"
def xsl_row_field_output(xml_data_path, datapath):
return datapath(xml_data_path / "row_field_output.xsl")
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ environment = {LDFLAGS="-Wl,--strip-all"}
test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17"
test-command = """
PANDAS_CI='1' python -c 'import pandas as pd; \
pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "-n 2"]); \
pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db"]);' \
pd.test(extra_args=["-m not clipboard and not single_cpu and not slow and not network and not db", "-n 2", "--no-strict-data-files"]); \
pd.test(extra_args=["-m not clipboard and single_cpu and not slow and not network and not db", "--no-strict-data-files"]);' \
"""

[tool.cibuildwheel.macos]
Expand Down Expand Up @@ -471,7 +471,7 @@ disable = [
[tool.pytest.ini_options]
# sync minversion with pyproject.toml & install.rst
minversion = "7.3.2"
addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml"
addopts = "--strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml"
empty_parameter_set_mark = "fail_at_collect"
xfail_strict = true
testpaths = "pandas"
Expand Down