Skip to content

BLD: Add DLL hashes to RECORD #52556

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Apr 13, 2023
67 changes: 35 additions & 32 deletions ci/fix_wheels.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
"""
This file "repairs" our Windows wheels by copying the necessary DLLs for pandas to run
on a barebones Windows installation() into the wheel.

NOTE: The paths for the DLLs are hard-coded to the location of the Visual Studio
redistributables
"""
import os
import shutil
import subprocess
from subprocess import CalledProcessError
import sys
import zipfile

Expand All @@ -18,41 +27,35 @@
raise ValueError(
"User must pass the path to the wheel and the destination directory."
)
# Wheels are zip files
if not os.path.isdir(dest_dir):
print(f"Created directory {dest_dir}")
os.mkdir(dest_dir)
shutil.copy(wheel_path, dest_dir) # Remember to delete if process fails

wheel_name = os.path.basename(wheel_path)
success = True
exception = None
repaired_wheel_path = os.path.join(dest_dir, wheel_name)
with zipfile.ZipFile(repaired_wheel_path, "a") as zipf:
try:
# TODO: figure out how licensing works for the redistributables
base_redist_dir = (
f"C:/Program Files (x86)/Microsoft Visual Studio/2019/"
f"Enterprise/VC/Redist/MSVC/14.29.30133/{PYTHON_ARCH}/"
f"Microsoft.VC142.CRT/"
)
zipf.write(
os.path.join(base_redist_dir, "msvcp140.dll"),
"pandas/_libs/window/msvcp140.dll",
)
zipf.write(
os.path.join(base_redist_dir, "concrt140.dll"),
"pandas/_libs/window/concrt140.dll",
)
if not is_32:
zipf.write(
os.path.join(base_redist_dir, "vcruntime140_1.dll"),
"pandas/_libs/window/vcruntime140_1.dll",
)
except Exception as e:
success = False
exception = e

if not success:
os.remove(repaired_wheel_path)
raise exception
print(f"Successfully repaired wheel was written to {repaired_wheel_path}")
try:
# Use the wheel CLI for zipping up the wheel since the CLI will
# take care of rebuilding the hashes found in the record file
tmp_dir = os.path.join(dest_dir, "tmp")
with zipfile.ZipFile(wheel_path, "r") as f:
# Extracting all the members of the zip
# into a specific location.
f.extractall(path=tmp_dir)
base_redist_dir = (
f"C:/Program Files (x86)/Microsoft Visual Studio/2019/"
f"Enterprise/VC/Redist/MSVC/14.29.30133/{PYTHON_ARCH}/"
f"Microsoft.VC142.CRT/"
)
required_dlls = ["msvcp140.dll", "concrt140.dll"]
if not is_32:
required_dlls += ["vcruntime140_1.dll"]
dest_dll_dir = os.path.join(tmp_dir, "pandas/_libs/window")
for dll in required_dlls:
src = os.path.join(base_redist_dir, dll)
shutil.copy(src, dest_dll_dir)
subprocess.run(["wheel", "pack", tmp_dir, "-d", dest_dir], check=True)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just curious, I see that wheel unpack will check the the hashes and file sizes and raise if there's a mismatch. Is it worth running that once after the packing is done?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice suggestion. I hadn't thought about that.
It seems to do the right thing, since I get

wheel unpack pandas-2.0.0-cp310-cp310-win_amd64.whl
Unpacking to: pandas-2.0.0...No hash found for file 'pandas/_libs/window/msvcp140.dll'

when downloading the wheel and trying to unpack on macos.

except CalledProcessError:
print("Failed to add DLLS to wheel.")
sys.exit(1)
print("Successfully repaired wheel")
12 changes: 12 additions & 0 deletions ci/test_wheels.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
import shutil
import subprocess
from subprocess import CalledProcessError
import sys

if os.name == "nt":
Expand All @@ -15,6 +16,17 @@
wheel_path = None
print(f"IS_32_BIT is {is_32_bit}")
print(f"Path to built wheel is {wheel_path}")

print("Verifying file hashes in wheel RECORD file")
try:
tmp_dir = "tmp"
subprocess.run(["wheel", "unpack", wheel_path, "-d", tmp_dir], check=True)
except CalledProcessError:
print("wheel RECORD file hash verification failed.")
sys.exit(1)
finally:
shutil.rmtree(tmp_dir)

if is_32_bit:
sys.exit(0) # No way to test Windows 32-bit(no docker image)
if wheel_path is None:
Expand Down