Skip to content

BLD: Build shared c dependencies as a library #47081

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,4 @@ runs:
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
shell: bash -el {0}
env:
# Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless this address's @simonjayhawkins #47081 (comment) I would prefer to keep this at N_JOBS: 1

Otherwise, I would suggest this be specific to the number of cores available on GHA hosted runners by OS: #47081 (comment)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless this address's @simonjayhawkins #47081 (comment)

I've needed to restart a few workflows this last week or so as they fail on the initial build, but it maybe once it is fixed that the initial build is successful, that subsequent incremental builds for commits in the range of the last couple of weeks could also be successful.

And it could also be that if this is a reoccurring problem that I can change the workflow to repeat the previous build step if the pandas import fails.

so I am happy that any comments/concerns are fully addressed before merging this.

# GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
N_JOBS: 1
#N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
N_JOBS: 4
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might still be worth aligning this with the number of core on the GH hosted runners (2 Window/Linux, 3 MacOS): https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources=

69 changes: 48 additions & 21 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
import platform
import shutil
import sys
from sysconfig import get_config_vars
from sysconfig import (
get_config_vars,
get_path,
)

import numpy
from pkg_resources import parse_version
Expand Down Expand Up @@ -102,6 +105,7 @@ def build_extensions(self):
if _CYTHON_INSTALLED:
self.render_templates(_pxifiles)

self.run_command("build_clib")
super().build_extensions()


Expand Down Expand Up @@ -437,6 +441,31 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.h",
]

internal_libraries = [
[
"np_datetime",
{
"sources": [
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
],
"include_dirs": [
"pandas/_libs/tslibs/src/datetime",
get_path("include"),
numpy.get_include(),
],
},
],
[
"tokenizer",
{
"sources": ["pandas/_libs/src/parser/tokenizer.c"],
"depends": ["pandas/_libs/src/parser/tokenizer.h"],
"include_dirs": [get_path("include")] + klib_include,
},
],
]

ext_data = {
"_libs.algos": {
"pyxfile": "_libs/algos",
Expand Down Expand Up @@ -471,7 +500,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"pyxfile": "_libs/lib",
"depends": lib_depends + tseries_depends,
"include": klib_include, # due to tokenizer import
"sources": ["pandas/_libs/src/parser/tokenizer.c"],
},
"_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends},
"_libs.parsers": {
Expand All @@ -481,8 +509,8 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"pandas/_libs/src/parser/tokenizer.h",
"pandas/_libs/src/parser/io.h",
],
"libraries": ["tokenizer"],
"sources": [
"pandas/_libs/src/parser/tokenizer.c",
"pandas/_libs/src/parser/io.c",
],
},
Expand All @@ -503,37 +531,35 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"_libs.tslibs.conversion": {
"pyxfile": "_libs/tslibs/conversion",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
},
"_libs.tslibs.fields": {
"pyxfile": "_libs/tslibs/fields",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
},
"_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"},
"_libs.tslibs.np_datetime": {
"pyxfile": "_libs/tslibs/np_datetime",
"depends": tseries_depends,
"sources": [
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
],
"libraries": ["np_datetime"],
},
"_libs.tslibs.offsets": {
"pyxfile": "_libs/tslibs/offsets",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
# "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.tslibs.parsing": {
"pyxfile": "_libs/tslibs/parsing",
"include": klib_include,
"depends": ["pandas/_libs/src/parser/tokenizer.h"],
"sources": ["pandas/_libs/src/parser/tokenizer.c"],
"libraries": ["tokenizer"],
},
"_libs.tslibs.period": {
"pyxfile": "_libs/tslibs/period",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
},
"_libs.tslibs.strptime": {
"pyxfile": "_libs/tslibs/strptime",
Expand All @@ -542,23 +568,25 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"_libs.tslibs.timedeltas": {
"pyxfile": "_libs/tslibs/timedeltas",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
},
"_libs.tslibs.timestamps": {
"pyxfile": "_libs/tslibs/timestamps",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
},
"_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"},
"_libs.tslibs.tzconversion": {
"pyxfile": "_libs/tslibs/tzconversion",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
# "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.tslibs.vectorized": {
"pyxfile": "_libs/tslibs/vectorized",
"depends": tseries_depends,
"sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
"libraries": ["np_datetime"],
# "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"],
},
"_libs.testing": {"pyxfile": "_libs/testing"},
"_libs.window.aggregations": {
Expand Down Expand Up @@ -601,6 +629,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
depends=data.get("depends", []),
include_dirs=include,
language=data.get("language", "c"),
libraries=data.get("libraries", []),
define_macros=data.get("macros", macros),
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
Expand Down Expand Up @@ -634,17 +663,14 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
"pandas/_libs/src/ujson/lib/ultrajsonenc.c",
"pandas/_libs/src/ujson/lib/ultrajsondec.c",
]
+ [
"pandas/_libs/tslibs/src/datetime/np_datetime.c",
"pandas/_libs/tslibs/src/datetime/np_datetime_strings.c",
]
),
include_dirs=[
"pandas/_libs/src/ujson/python",
"pandas/_libs/src/ujson/lib",
"pandas/_libs/src/datetime",
"pandas/_libs/tslibs/src/datetime",
numpy.get_include(),
],
libraries=["np_datetime"],
extra_compile_args=(["-D_GNU_SOURCE"] + extra_compile_args),
extra_link_args=extra_link_args,
define_macros=macros,
Expand All @@ -662,5 +688,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
setup(
version=versioneer.get_version(),
ext_modules=maybe_cythonize(extensions, compiler_directives=directives),
libraries=internal_libraries,
cmdclass=cmdclass,
)