Skip to content

Re-land PyCapsule #52021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .circleci/setup_env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then
fi

echo "Build extensions"
# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
python setup.py build_ext -q -j1
python setup.py build_ext -q -j4

echo "Install pandas"
python -m pip install --no-build-isolation --no-use-pep517 -e .
Expand Down
6 changes: 2 additions & 4 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,5 @@ runs:
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
shell: bash -el {0}
env:
# Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
# GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
N_JOBS: 1
#N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}
2 changes: 1 addition & 1 deletion .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ runs:
environment-name: ${{ inputs.environment-name }}
extra-specs: ${{ inputs.extra-specs }}
channels: conda-forge
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
channel-priority: 'strict'
condarc-file: ci/condarc.yml
cache-env: true
cache-downloads: true
2 changes: 1 addition & 1 deletion .github/workflows/32-bit-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
python -m pip install versioneer[toml] && \
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
python setup.py build_ext -q -j1 && \
python setup.py build_ext -q -j$(nproc) && \
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
python -m pip list && \
export PANDAS_CI=1 && \
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/python-dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,9 @@ jobs:
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
python -m pip list

# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
- name: Build Pandas
run: |
python setup.py build_ext -q -j1
python setup.py build_ext -q -j4
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index

- name: Build Version
Expand Down
2 changes: 2 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data
# Selectively re-add *.cxx files that were excluded above
graft pandas/_libs/src
graft pandas/_libs/tslibs/src
include pandas/_libs/pd_parser.h
include pandas/_libs/pd_parser.c
5 changes: 5 additions & 0 deletions pandas/_libs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
]


# Below imports needs to happen first to ensure pandas top level
# module gets monkeypatched with the pandas_datetime_CAPI
# see pandas_datetime_exec in pd_datetime.c
import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport]
import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport]
from pandas._libs.interval import Interval
from pandas._libs.tslibs import (
NaT,
Expand Down
5 changes: 5 additions & 0 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
get_unit_from_dtype,
import_pandas_datetime,
)

import_pandas_datetime()


from pandas._libs.tslibs.period cimport is_period_object
from pandas._libs.tslibs.timedeltas cimport _Timedelta
from pandas._libs.tslibs.timestamps cimport _Timestamp
Expand Down
6 changes: 4 additions & 2 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h":
cdef extern from "numpy/ndarrayobject.h":
bint PyArray_CheckScalar(obj) nogil


cdef extern from "src/parse_helper.h":
cdef extern from "pd_parser.h":
int floatify(object, float64_t *result, int *maybe_int) except -1
void PandasParser_IMPORT()

PandasParser_IMPORT

from pandas._libs cimport util
from pandas._libs.util cimport (
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/missing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,11 @@ from pandas._libs.tslibs.np_datetime cimport (
get_datetime64_unit,
get_datetime64_value,
get_timedelta64_value,
import_pandas_datetime,
)

import_pandas_datetime()

from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op

cdef:
Expand Down
73 changes: 53 additions & 20 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h":
int64_t skip_first_N_rows
int64_t skipfooter
# pick one, depending on whether the converter requires GIL
float64_t (*double_converter)(const char *, char **,
char, char, char,
int, int *, int *) nogil
double (*double_converter)(const char *, char **,
char, char, char,
int, int *, int *) nogil

# error handling
char *warn_msg
Expand All @@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h":
int seen_uint
int seen_null

void COLITER_NEXT(coliter_t, const char *) nogil

cdef extern from "pd_parser.h":
void *new_rd_source(object obj) except NULL

int del_rd_source(void *src)

void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status, const char *encoding_errors)

void uint_state_init(uint_state *self)
int uint64_conflict(uint_state *self)

Expand Down Expand Up @@ -279,26 +289,49 @@ cdef extern from "parser/tokenizer.h":
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
uint64_t uint_max, int *error, char tsep) nogil

float64_t xstrtod(const char *p, char **q, char decimal,
double xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
double precise_xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
double round_trip(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t precise_xstrtod(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil
float64_t round_trip(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil

int to_boolean(const char *item, uint8_t *val) nogil

void PandasParser_IMPORT()

cdef extern from "parser/io.h":
void *new_rd_source(object obj) except NULL
PandasParser_IMPORT

int del_rd_source(void *src)
# When not invoked directly but rather assigned as a function,
# cdef extern'ed declarations seem to leave behind an undefined symbol
cdef double xstrtod_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)

void* buffer_rd_bytes(void *source, size_t nbytes,
size_t *bytes_read, int *status, const char *encoding_errors)

cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)


cdef double round_trip_wrapper(const char *p, char **q, char decimal,
char sci, char tsep, int skip_trailing,
int *error, int *maybe_int) nogil:
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)


cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
size_t *bytes_read, int *status,
const char *encoding_errors) noexcept:
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)

cdef int del_rd_source_wrapper(void *src) noexcept:
return del_rd_source(src)


cdef class TextReader:
Expand Down Expand Up @@ -485,11 +518,11 @@ cdef class TextReader:

if float_precision == "round_trip":
# see gh-15140
self.parser.double_converter = round_trip
self.parser.double_converter = round_trip_wrapper
elif float_precision == "legacy":
self.parser.double_converter = xstrtod
self.parser.double_converter = xstrtod_wrapper
elif float_precision == "high" or float_precision is None:
self.parser.double_converter = precise_xstrtod
self.parser.double_converter = precise_xstrtod_wrapper
else:
raise ValueError(f"Unrecognized float_precision option: "
f"{float_precision}")
Expand Down Expand Up @@ -607,8 +640,8 @@ cdef class TextReader:

ptr = new_rd_source(source)
self.parser.source = ptr
self.parser.cb_io = &buffer_rd_bytes
self.parser.cb_cleanup = &del_rd_source
self.parser.cb_io = buffer_rd_bytes_wrapper
self.parser.cb_cleanup = del_rd_source_wrapper

cdef _get_header(self, list prelim_header):
# header is now a list of lists, so field_count should use header[0]
Expand Down
Loading