diff --git a/.circleci/setup_env.sh b/.circleci/setup_env.sh index 52a8cab1cd2de..7f82b613f8cb8 100755 --- a/.circleci/setup_env.sh +++ b/.circleci/setup_env.sh @@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then fi echo "Build extensions" -# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs -python setup.py build_ext -q -j1 +python setup.py build_ext -q -j4 echo "Install pandas" python -m pip install --no-build-isolation --no-use-pep517 -e . diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index 23bb988ef4d73..11601564c5d79 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -16,7 +16,5 @@ runs: python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index shell: bash -el {0} env: - # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873 - # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct - N_JOBS: 1 - #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }} + # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources + N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }} diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index 002d0020c2df1..efc31bba88f28 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -30,7 +30,7 @@ runs: environment-name: ${{ inputs.environment-name }} extra-specs: ${{ inputs.extra-specs }} channels: conda-forge - channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }} + channel-priority: 'strict' condarc-file: ci/condarc.yml cache-env: true cache-downloads: true diff --git a/.github/workflows/32-bit-linux.yml b/.github/workflows/32-bit-linux.yml index 08026a5fd637f..03925991ec632 100644 --- a/.github/workflows/32-bit-linux.yml +++ b/.github/workflows/32-bit-linux.yml @@ -40,7 +40,7 @@ jobs: python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \ python -m pip install versioneer[toml] && \ python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \ - python setup.py build_ext -q -j1 && \ + python setup.py build_ext -q -j$(nproc) && \ python -m pip install --no-build-isolation --no-use-pep517 -e . && \ python -m pip list && \ export PANDAS_CI=1 && \ diff --git a/.github/workflows/python-dev.yml b/.github/workflows/python-dev.yml index c84db0d80d0eb..d498c1c3a22a3 100644 --- a/.github/workflows/python-dev.yml +++ b/.github/workflows/python-dev.yml @@ -82,10 +82,9 @@ jobs: python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17 python -m pip list - # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs - name: Build Pandas run: | - python setup.py build_ext -q -j1 + python setup.py build_ext -q -j4 python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index - name: Build Version diff --git a/MANIFEST.in b/MANIFEST.in index d2b1b8cb887bc..361cd8ff9ec22 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data # Selectively re-add *.cxx files that were excluded above graft pandas/_libs/src graft pandas/_libs/tslibs/src +include pandas/_libs/pd_parser.h +include pandas/_libs/pd_parser.c diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py index f119e280f5867..2c532cda480f0 100644 --- a/pandas/_libs/__init__.py +++ b/pandas/_libs/__init__.py @@ -10,6 +10,11 @@ ] +# Below imports needs to happen first to ensure pandas top level +# module gets monkeypatched with the pandas_datetime_CAPI +# see pandas_datetime_exec in pd_datetime.c +import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport] +import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport] from pandas._libs.interval import Interval from pandas._libs.tslibs import ( NaT, diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index b72bda00ec697..18e0bd4014ac8 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_unit_from_dtype, + import_pandas_datetime, ) + +import_pandas_datetime() + + from pandas._libs.tslibs.period cimport is_period_object from pandas._libs.tslibs.timedeltas cimport _Timedelta from pandas._libs.tslibs.timestamps cimport _Timestamp diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 3c455d7db8e2e..a9fcf6b28953b 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h": cdef extern from "numpy/ndarrayobject.h": bint PyArray_CheckScalar(obj) nogil - -cdef extern from "src/parse_helper.h": +cdef extern from "pd_parser.h": int floatify(object, float64_t *result, int *maybe_int) except -1 + void PandasParser_IMPORT() + +PandasParser_IMPORT from pandas._libs cimport util from pandas._libs.util cimport ( diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index ecb9baf8d3f65..75955180447c4 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -34,8 +34,11 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_timedelta64_value, + import_pandas_datetime, ) +import_pandas_datetime() + from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op cdef: diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 2839730ca46bd..19a121253e29a 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h": int64_t skip_first_N_rows int64_t skipfooter # pick one, depending on whether the converter requires GIL - float64_t (*double_converter)(const char *, char **, - char, char, char, - int, int *, int *) nogil + double (*double_converter)(const char *, char **, + char, char, char, + int, int *, int *) nogil # error handling char *warn_msg @@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h": int seen_uint int seen_null + void COLITER_NEXT(coliter_t, const char *) nogil + +cdef extern from "pd_parser.h": + void *new_rd_source(object obj) except NULL + + int del_rd_source(void *src) + + void* buffer_rd_bytes(void *source, size_t nbytes, + size_t *bytes_read, int *status, const char *encoding_errors) + void uint_state_init(uint_state *self) int uint64_conflict(uint_state *self) @@ -279,26 +289,49 @@ cdef extern from "parser/tokenizer.h": uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max, uint64_t uint_max, int *error, char tsep) nogil - float64_t xstrtod(const char *p, char **q, char decimal, + double xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + double precise_xstrtod(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil + double round_trip(const char *p, char **q, char decimal, char sci, char tsep, int skip_trailing, int *error, int *maybe_int) nogil - float64_t precise_xstrtod(const char *p, char **q, char decimal, - char sci, char tsep, int skip_trailing, - int *error, int *maybe_int) nogil - float64_t round_trip(const char *p, char **q, char decimal, - char sci, char tsep, int skip_trailing, - int *error, int *maybe_int) nogil int to_boolean(const char *item, uint8_t *val) nogil + void PandasParser_IMPORT() -cdef extern from "parser/io.h": - void *new_rd_source(object obj) except NULL +PandasParser_IMPORT - int del_rd_source(void *src) +# When not invoked directly but rather assigned as a function, +# cdef extern'ed declarations seem to leave behind an undefined symbol +cdef double xstrtod_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) - void* buffer_rd_bytes(void *source, size_t nbytes, - size_t *bytes_read, int *status, const char *encoding_errors) + +cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) + + +cdef double round_trip_wrapper(const char *p, char **q, char decimal, + char sci, char tsep, int skip_trailing, + int *error, int *maybe_int) nogil: + return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) + + +cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes, + size_t *bytes_read, int *status, + const char *encoding_errors) noexcept: + return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors) + +cdef int del_rd_source_wrapper(void *src) noexcept: + return del_rd_source(src) cdef class TextReader: @@ -485,11 +518,11 @@ cdef class TextReader: if float_precision == "round_trip": # see gh-15140 - self.parser.double_converter = round_trip + self.parser.double_converter = round_trip_wrapper elif float_precision == "legacy": - self.parser.double_converter = xstrtod + self.parser.double_converter = xstrtod_wrapper elif float_precision == "high" or float_precision is None: - self.parser.double_converter = precise_xstrtod + self.parser.double_converter = precise_xstrtod_wrapper else: raise ValueError(f"Unrecognized float_precision option: " f"{float_precision}") @@ -607,8 +640,8 @@ cdef class TextReader: ptr = new_rd_source(source) self.parser.source = ptr - self.parser.cb_io = &buffer_rd_bytes - self.parser.cb_cleanup = &del_rd_source + self.parser.cb_io = buffer_rd_bytes_wrapper + self.parser.cb_cleanup = del_rd_source_wrapper cdef _get_header(self, list prelim_header): # header is now a list of lists, so field_count should use header[0] diff --git a/pandas/_libs/pd_parser.c b/pandas/_libs/pd_parser.c new file mode 100644 index 0000000000000..15d82b59df3e8 --- /dev/null +++ b/pandas/_libs/pd_parser.c @@ -0,0 +1,178 @@ +/* + +Copyright (c) 2023, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ +#define _PANDAS_PARSER_IMPL + +#include "pd_parser.h" +#include "src/parser/io.h" + +static int to_double(char *item, double *p_value, char sci, char decimal, + int *maybe_int) { + char *p_end = NULL; + int error = 0; + + /* Switch to precise xstrtod GH 31364 */ + *p_value = + precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, &error, maybe_int); + + return (error == 0) && (!*p_end); +} + +static int floatify(PyObject *str, double *result, int *maybe_int) { + int status; + char *data; + PyObject *tmp = NULL; + const char sci = 'E'; + const char dec = '.'; + + if (PyBytes_Check(str)) { + data = PyBytes_AS_STRING(str); + } else if (PyUnicode_Check(str)) { + tmp = PyUnicode_AsUTF8String(str); + if (tmp == NULL) { + return -1; + } + data = PyBytes_AS_STRING(tmp); + } else { + PyErr_SetString(PyExc_TypeError, "Invalid object type"); + return -1; + } + + status = to_double(data, result, sci, dec, maybe_int); + + if (!status) { + /* handle inf/-inf infinity/-infinity */ + if (strlen(data) == 3) { + if (0 == strcasecmp(data, "inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 4) { + if (0 == strcasecmp(data, "-inf")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+inf")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 8) { + if (0 == strcasecmp(data, "infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else if (strlen(data) == 9) { + if (0 == strcasecmp(data, "-infinity")) { + *result = -HUGE_VAL; + *maybe_int = 0; + } else if (0 == strcasecmp(data, "+infinity")) { + *result = HUGE_VAL; + *maybe_int = 0; + } else { + goto parsingerror; + } + } else { + goto parsingerror; + } + } + + Py_XDECREF(tmp); + return 0; + +parsingerror: + PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); + Py_XDECREF(tmp); + return -1; +} + + +static void pandas_parser_destructor(PyObject *op) { + void *ptr = PyCapsule_GetPointer(op, PandasParser_CAPSULE_NAME); + PyMem_Free(ptr); +} + +static int pandas_parser_exec(PyObject *module) { + PandasParser_CAPI *capi = PyMem_Malloc(sizeof(PandasParser_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return -1; + } + + capi->to_double = to_double; + capi->floatify = floatify; + capi->new_rd_source = new_rd_source; + capi->del_rd_source = del_rd_source; + capi->buffer_rd_bytes = buffer_rd_bytes; + capi->uint_state_init = uint_state_init; + capi->uint64_conflict = uint64_conflict; + capi->coliter_setup = coliter_setup; + capi->parser_new = parser_new; + capi->parser_init = parser_init; + capi->parser_free = parser_free; + capi->parser_del = parser_del; + capi->parser_add_skiprow = parser_add_skiprow; + capi->parser_set_skipfirstnrows = parser_set_skipfirstnrows; + capi->parser_set_default_options = parser_set_default_options; + capi->parser_consume_rows = parser_consume_rows; + capi->parser_trim_buffers = parser_trim_buffers; + capi->tokenize_all_rows = tokenize_all_rows; + capi->tokenize_nrows = tokenize_nrows; + capi->str_to_int64 = str_to_int64; + capi->str_to_uint64 = str_to_uint64; + capi->xstrtod = xstrtod; + capi->precise_xstrtod = precise_xstrtod; + capi->round_trip = round_trip; + capi->to_boolean = to_boolean; + + PyObject *capsule = + PyCapsule_New(capi, PandasParser_CAPSULE_NAME, pandas_parser_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, + "pd_parser.c could not import module pandas"); + Py_DECREF(capsule); + return -1; + } + + if (PyModule_AddObject(pandas, "_pandas_parser_CAPI", capsule) < 0) { + Py_DECREF(capsule); + return -1; + } + + return 0; +} + +static PyModuleDef_Slot pandas_parser_slots[] = { + {Py_mod_exec, pandas_parser_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_parsermodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_parser", + + .m_doc = "Internal module with parser support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_parser_slots}; + +PyMODINIT_FUNC PyInit_pandas_parser(void) { + return PyModuleDef_Init(&pandas_parsermodule); +} diff --git a/pandas/_libs/pd_parser.h b/pandas/_libs/pd_parser.h new file mode 100644 index 0000000000000..acdc08bbad484 --- /dev/null +++ b/pandas/_libs/pd_parser.h @@ -0,0 +1,113 @@ +/* + +Copyright (c) 2023, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +*/ +#ifndef PANDAS__LIBS_PD_PARSER_H_ +#define PANDAS__LIBS_PD_PARSER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#define PY_SSIZE_T_CLEAN +#include +#include "src/parser/tokenizer.h" + +typedef struct { + int (*to_double)(char *, double *, char, char, int *); + int (*floatify)(PyObject *, double *, int *); + void *(*new_rd_source)(PyObject *); + int (*del_rd_source)(void *); + void *(*buffer_rd_bytes)(void *, size_t, size_t *, int *, const char *); + void (*uint_state_init)(uint_state *); + int (*uint64_conflict)(uint_state *); + void (*coliter_setup)(coliter_t *, parser_t *, int64_t, int64_t); + parser_t *(*parser_new)(void); + int (*parser_init)(parser_t *); + void (*parser_free)(parser_t *); + void (*parser_del)(parser_t *); + int (*parser_add_skiprow)(parser_t *, int64_t); + int (*parser_set_skipfirstnrows)(parser_t *, int64_t); + void (*parser_set_default_options)(parser_t *); + int (*parser_consume_rows)(parser_t *, size_t); + int (*parser_trim_buffers)(parser_t *); + int (*tokenize_all_rows)(parser_t *, const char *); + int (*tokenize_nrows)(parser_t *, size_t, const char *); + int64_t (*str_to_int64)(const char *, int64_t, int64_t, int *, char); + uint64_t (*str_to_uint64)(uint_state *, const char *, int64_t, uint64_t, + int *, char); + double (*xstrtod)(const char *, char **, char, char, char, int, int *, int *); + double (*precise_xstrtod)(const char *, char **, char, char, char, int, int *, + int *); + double (*round_trip)(const char *, char **, char, char, char, int, int *, + int *); + int (*to_boolean)(const char *, uint8_t *); +} PandasParser_CAPI; + +#define PandasParser_CAPSULE_NAME "pandas._pandas_parser_CAPI" + +#ifndef _PANDAS_PARSER_IMPL +static PandasParser_CAPI *PandasParserAPI = NULL; + +#define PandasParser_IMPORT \ + PandasParserAPI = \ + (PandasParser_CAPI *)PyCapsule_Import(PandasParser_CAPSULE_NAME, 0) + +#define to_double(item, p_value, sci, decimal, maybe_int) \ + PandasParserAPI->to_double((item), (p_value), (sci), (decimal), (maybe_int)) +#define floatify(str, result, maybe_int) \ + PandasParserAPI->floatify((str), (result), (maybe_int)) +#define new_rd_source(obj) PandasParserAPI->new_rd_source((obj)) +#define del_rd_source(src) PandasParserAPI->del_rd_source((src)) +#define buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors) \ + PandasParserAPI->buffer_rd_bytes((source), (nbytes), (bytes_read), (status), \ + (encoding_errors)) +#define uint_state_init(self) PandasParserAPI->uint_state_init((self)) +#define uint64_conflict(self) PandasParserAPI->uint64_conflict((self)) +#define coliter_setup(self, parser, i, start) \ + PandasParserAPI->coliter_setup((self), (parser), (i), (start)) +#define parser_new PandasParserAPI->parser_new +#define parser_init(self) PandasParserAPI->parser_init((self)) +#define parser_free(self) PandasParserAPI->parser_free((self)) +#define parser_del(self) PandasParserAPI->parser_del((self)) +#define parser_add_skiprow(self, row) \ + PandasParserAPI->parser_add_skiprow((self), (row)) +#define parser_set_skipfirstnrows(self, nrows) \ + PandasParserAPI->parser_set_skipfirstnrows((self), (nrows)) +#define parser_set_default_options(self) \ + PandasParserAPI->parser_set_default_options((self)) +#define parser_consume_rows(self, nrows) \ + PandasParserAPI->parser_consume_rows((self), (nrows)) +#define parser_trim_buffers(self) \ + PandasParserAPI->parser_trim_buffers((self)) +#define tokenize_all_rows(self, encoding_errors) \ + PandasParserAPI->tokenize_all_rows((self), (encoding_errors)) +#define tokenize_nrows(self, nrows, encoding_errors) \ + PandasParserAPI->tokenize_nrows((self), (nrows), (encoding_errors)) +#define str_to_int64(p_item, int_min, int_max, error, t_sep) \ + PandasParserAPI->str_to_int64((p_item), (int_min), (int_max), (error), \ + (t_sep)) +#define str_to_uint64(state, p_item, int_max, uint_max, error, t_sep) \ + PandasParserAPI->str_to_uint64((state), (p_item), (int_max), (uint_max), \ + (error), (t_sep)) +#define xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \ + PandasParserAPI->xstrtod((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, \ + maybe_int) \ + PandasParserAPI->precise_xstrtod((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int) \ + PandasParserAPI->round_trip((p), (q), (decimal), (sci), (tsep), \ + (skip_trailing), (error), (maybe_int)) +#define to_boolean(item, val) PandasParserAPI->to_boolean((item), (val)) +#endif /* !defined(_PANDAS_PARSER_IMPL) */ + +#ifdef __cplusplus +} +#endif +#endif // PANDAS__LIBS_PD_PARSER_H_ diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h deleted file mode 100644 index d161c4e29fe15..0000000000000 --- a/pandas/_libs/src/parse_helper.h +++ /dev/null @@ -1,100 +0,0 @@ -/* -Copyright (c) 2016, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#ifndef PANDAS__LIBS_SRC_PARSE_HELPER_H_ -#define PANDAS__LIBS_SRC_PARSE_HELPER_H_ - -#include -#include "parser/tokenizer.h" - -int to_double(char *item, double *p_value, char sci, char decimal, - int *maybe_int) { - char *p_end = NULL; - int error = 0; - - /* Switch to precise xstrtod GH 31364 */ - *p_value = precise_xstrtod(item, &p_end, decimal, sci, '\0', 1, - &error, maybe_int); - - return (error == 0) && (!*p_end); -} - -int floatify(PyObject *str, double *result, int *maybe_int) { - int status; - char *data; - PyObject *tmp = NULL; - const char sci = 'E'; - const char dec = '.'; - - if (PyBytes_Check(str)) { - data = PyBytes_AS_STRING(str); - } else if (PyUnicode_Check(str)) { - tmp = PyUnicode_AsUTF8String(str); - if (tmp == NULL) { - return -1; - } - data = PyBytes_AS_STRING(tmp); - } else { - PyErr_SetString(PyExc_TypeError, "Invalid object type"); - return -1; - } - - status = to_double(data, result, sci, dec, maybe_int); - - if (!status) { - /* handle inf/-inf infinity/-infinity */ - if (strlen(data) == 3) { - if (0 == strcasecmp(data, "inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 4) { - if (0 == strcasecmp(data, "-inf")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+inf")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 8) { - if (0 == strcasecmp(data, "infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else if (strlen(data) == 9) { - if (0 == strcasecmp(data, "-infinity")) { - *result = -HUGE_VAL; - *maybe_int = 0; - } else if (0 == strcasecmp(data, "+infinity")) { - *result = HUGE_VAL; - *maybe_int = 0; - } else { - goto parsingerror; - } - } else { - goto parsingerror; - } - } - - Py_XDECREF(tmp); - return 0; - -parsingerror: - PyErr_Format(PyExc_ValueError, "Unable to parse string \"%s\"", data); - Py_XDECREF(tmp); - return -1; -} - -#endif // PANDAS__LIBS_SRC_PARSE_HELPER_H_ diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 34014fd062157..fed9b26d479cb 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -105,7 +105,7 @@ void parser_set_default_options(parser_t *self) { self->skip_footer = 0; } -parser_t *parser_new() { return (parser_t *)calloc(1, sizeof(parser_t)); } +parser_t *parser_new(void) { return (parser_t *)calloc(1, sizeof(parser_t)); } int parser_clear_data_buffers(parser_t *self) { free_if_not_null((void *)&self->stream); diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index e892b50515327..1b8ba8f3f7e6c 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -47,8 +47,8 @@ Numeric decoder derived from TCL library #include #include #include -#include "date_conversions.h" #include "datetime.h" +#include "pd_datetime.h" npy_int64 get_nat(void) { return NPY_MIN_INT64; } @@ -1977,6 +1977,11 @@ PyObject *objToJSON(PyObject *Py_UNUSED(self), PyObject *args, return NULL; } + PandasDateTime_IMPORT; + if (PandasDateTimeAPI == NULL) { + return NULL; + } + static char *kwlist[] = {"obj", "ensure_ascii", "double_precision", diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index d9ed20962b6ae..106f203a16855 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -34,12 +34,17 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, check_dts_bounds, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydate_to_dt64, string_to_dts, ) + +import_pandas_datetime() + + from pandas._libs.tslibs.strptime cimport parse_today_now from pandas._libs.util cimport ( is_datetime64_object, diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 256157cf70032..cb13cde7a4bed 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -41,6 +41,7 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_implementation_bounds, + import_pandas_datetime, npy_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, @@ -50,6 +51,8 @@ from pandas._libs.tslibs.np_datetime cimport ( string_to_dts, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.nattype cimport ( diff --git a/pandas/_libs/tslibs/dtypes.pyx b/pandas/_libs/tslibs/dtypes.pyx index 699e8aba76dd6..eb24e631e0a36 100644 --- a/pandas/_libs/tslibs/dtypes.pyx +++ b/pandas/_libs/tslibs/dtypes.pyx @@ -5,8 +5,11 @@ from enum import Enum from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_conversion_factor, + import_pandas_datetime, ) +import_pandas_datetime() + cdef class PeriodDtypeBase: """ diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 2f1fb7fc44b2f..3873e0c848145 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -43,12 +43,15 @@ from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) +import_pandas_datetime() + @cython.wraparound(False) @cython.boundscheck(False) diff --git a/pandas/_libs/tslibs/np_datetime.pxd b/pandas/_libs/tslibs/np_datetime.pxd index 3faef6ed5d46e..7e19eb084b59e 100644 --- a/pandas/_libs/tslibs/np_datetime.pxd +++ b/pandas/_libs/tslibs/np_datetime.pxd @@ -54,7 +54,8 @@ cdef extern from "numpy/ndarraytypes.h": int64_t NPY_DATETIME_NAT # elswhere we call this NPY_NAT -cdef extern from "src/datetime/np_datetime.h": + +cdef extern from "src/datetime/pd_datetime.h": ctypedef struct pandas_timedeltastruct: int64_t days int32_t hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds @@ -71,6 +72,17 @@ cdef extern from "src/datetime/np_datetime.h": pandas_timedeltastruct *result ) nogil + void PandasDateTime_IMPORT() + + ctypedef enum FormatRequirement: + PARTIAL_MATCH + EXACT_MATCH + INFER_FORMAT + +# You must call this before using the PandasDateTime CAPI functions +cdef inline void import_pandas_datetime(): + PandasDateTime_IMPORT + cdef bint cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1 cdef check_dts_bounds(npy_datetimestruct *dts, NPY_DATETIMEUNIT unit=?) @@ -124,9 +136,3 @@ cdef int64_t convert_reso( NPY_DATETIMEUNIT to_reso, bint round_ok, ) except? -1 - -cdef extern from "src/datetime/np_datetime_strings.h": - ctypedef enum FormatRequirement: - PARTIAL_MATCH - EXACT_MATCH - INFER_FORMAT diff --git a/pandas/_libs/tslibs/np_datetime.pyx b/pandas/_libs/tslibs/np_datetime.pyx index d9aac87384952..7e1a516e0d945 100644 --- a/pandas/_libs/tslibs/np_datetime.pyx +++ b/pandas/_libs/tslibs/np_datetime.pyx @@ -20,6 +20,7 @@ from cpython.object cimport ( ) import_datetime() +PandasDateTime_IMPORT import numpy as np @@ -35,7 +36,7 @@ from numpy cimport ( from pandas._libs.tslibs.util cimport get_c_string_buf_and_size -cdef extern from "src/datetime/np_datetime.h": +cdef extern from "src/datetime/pd_datetime.h": int cmp_npy_datetimestruct(npy_datetimestruct *a, npy_datetimestruct *b) @@ -48,7 +49,6 @@ cdef extern from "src/datetime/np_datetime.h": PyArray_DatetimeMetaData get_datetime_metadata_from_dtype(cnp.PyArray_Descr *dtype) -cdef extern from "src/datetime/np_datetime_strings.h": int parse_iso_8601_datetime(const char *str, int len, int want_exc, npy_datetimestruct *out, NPY_DATETIMEUNIT *out_bestunit, @@ -56,7 +56,6 @@ cdef extern from "src/datetime/np_datetime_strings.h": const char *format, int format_len, FormatRequirement exact) - # ---------------------------------------------------------------------- # numpy object inspection diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index d780cf0c4ffe3..0e2ac692e579c 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -60,12 +60,15 @@ from pandas._libs.tslibs.nattype cimport ( from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydate_to_dtstruct, ) +import_pandas_datetime() + from .dtypes cimport PeriodDtypeCode from .timedeltas cimport ( _Timedelta, diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index c314149e24a4c..cd92e1b8deb34 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -69,10 +69,13 @@ from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + import_pandas_datetime, npy_datetimestruct, string_to_dts, ) +import_pandas_datetime() + from pandas._libs.tslibs.strptime import array_strptime from pandas._libs.tslibs.util cimport ( diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 7da1cab9af4f9..93cda2ec49c26 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -49,11 +49,14 @@ from pandas._libs.tslibs.np_datetime cimport ( astype_overflowsafe, check_dts_bounds, get_timedelta64_value, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, ) +import_pandas_datetime() + from pandas._libs.tslibs.timestamps import Timestamp from pandas._libs.tslibs.ccalendar cimport ( diff --git a/pandas/_libs/src/ujson/python/date_conversions.c b/pandas/_libs/tslibs/src/datetime/date_conversions.c similarity index 97% rename from pandas/_libs/src/ujson/python/date_conversions.c rename to pandas/_libs/tslibs/src/datetime/date_conversions.c index 86cb68f869cb0..e2d583470fa51 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.c +++ b/pandas/_libs/tslibs/src/datetime/date_conversions.c @@ -9,8 +9,8 @@ The full license is in the LICENSE file, distributed with this software. // but which don't interact with JSON objects directly #include "date_conversions.h" -#include <../../../tslibs/src/datetime/np_datetime.h> -#include <../../../tslibs/src/datetime/np_datetime_strings.h> +#include "np_datetime.h" +#include "np_datetime_strings.h" /* * Function: scaleNanosecToUnit diff --git a/pandas/_libs/src/ujson/python/date_conversions.h b/pandas/_libs/tslibs/src/datetime/date_conversions.h similarity index 88% rename from pandas/_libs/src/ujson/python/date_conversions.h rename to pandas/_libs/tslibs/src/datetime/date_conversions.h index efd707f04197c..45ba710dd42f2 100644 --- a/pandas/_libs/src/ujson/python/date_conversions.h +++ b/pandas/_libs/tslibs/src/datetime/date_conversions.h @@ -5,8 +5,8 @@ Distributed under the terms of the BSD Simplified License. The full license is in the LICENSE file, distributed with this software. */ -#ifndef PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ -#define PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ #define PY_SSIZE_T_CLEAN #include @@ -36,4 +36,4 @@ npy_datetime PyDateTimeToEpoch(PyObject *dt, NPY_DATETIMEUNIT base); char *int64ToIsoDuration(int64_t value, size_t *len); -#endif // PANDAS__LIBS_SRC_UJSON_PYTHON_DATE_CONVERSIONS_H_ +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_DATE_CONVERSIONS_H_ diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.c b/pandas/_libs/tslibs/src/datetime/np_datetime.c index fa6fc75366b79..f18289c9173f2 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.c +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.c @@ -28,41 +28,6 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt #include "np_datetime.h" -const npy_datetimestruct _AS_MIN_DTS = { - 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; -const npy_datetimestruct _FS_MIN_DTS = { - 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; -const npy_datetimestruct _PS_MIN_DTS = { - 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; -const npy_datetimestruct _NS_MIN_DTS = { - 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; -const npy_datetimestruct _US_MIN_DTS = { - -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; -const npy_datetimestruct _MS_MIN_DTS = { - -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; -const npy_datetimestruct _S_MIN_DTS = { - -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; -const npy_datetimestruct _M_MIN_DTS = { - -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; - -const npy_datetimestruct _AS_MAX_DTS = { - 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; -const npy_datetimestruct _FS_MAX_DTS = { - 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; -const npy_datetimestruct _PS_MAX_DTS = { - 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; -const npy_datetimestruct _NS_MAX_DTS = { - 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; -const npy_datetimestruct _US_MAX_DTS = { - 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; -const npy_datetimestruct _MS_MAX_DTS = { - 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; -const npy_datetimestruct _S_MAX_DTS = { - 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; -const npy_datetimestruct _M_MAX_DTS = { - 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; - - const int days_per_month_table[2][12] = { {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}, {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}}; diff --git a/pandas/_libs/tslibs/src/datetime/np_datetime.h b/pandas/_libs/tslibs/src/datetime/np_datetime.h index 6ab915e517cfb..68f72683ab2e4 100644 --- a/pandas/_libs/tslibs/src/datetime/np_datetime.h +++ b/pandas/_libs/tslibs/src/datetime/np_datetime.h @@ -28,22 +28,39 @@ typedef struct { npy_int32 hrs, min, sec, ms, us, ns, seconds, microseconds, nanoseconds; } pandas_timedeltastruct; -extern const npy_datetimestruct _AS_MIN_DTS; -extern const npy_datetimestruct _AS_MAX_DTS; -extern const npy_datetimestruct _FS_MIN_DTS; -extern const npy_datetimestruct _FS_MAX_DTS; -extern const npy_datetimestruct _PS_MIN_DTS; -extern const npy_datetimestruct _PS_MAX_DTS; -extern const npy_datetimestruct _NS_MIN_DTS; -extern const npy_datetimestruct _NS_MAX_DTS; -extern const npy_datetimestruct _US_MIN_DTS; -extern const npy_datetimestruct _US_MAX_DTS; -extern const npy_datetimestruct _MS_MIN_DTS; -extern const npy_datetimestruct _MS_MAX_DTS; -extern const npy_datetimestruct _S_MIN_DTS; -extern const npy_datetimestruct _S_MAX_DTS; -extern const npy_datetimestruct _M_MIN_DTS; -extern const npy_datetimestruct _M_MAX_DTS; +static const npy_datetimestruct _AS_MIN_DTS = { + 1969, 12, 31, 23, 59, 50, 776627, 963145, 224193}; +static const npy_datetimestruct _FS_MIN_DTS = { + 1969, 12, 31, 21, 26, 16, 627963, 145224, 193000}; +static const npy_datetimestruct _PS_MIN_DTS = { + 1969, 9, 16, 5, 57, 7, 963145, 224193, 0}; +static const npy_datetimestruct _NS_MIN_DTS = { + 1677, 9, 21, 0, 12, 43, 145224, 193000, 0}; +static const npy_datetimestruct _US_MIN_DTS = { + -290308, 12, 21, 19, 59, 05, 224193, 0, 0}; +static const npy_datetimestruct _MS_MIN_DTS = { + -292275055, 5, 16, 16, 47, 4, 193000, 0, 0}; +static const npy_datetimestruct _S_MIN_DTS = { + -292277022657, 1, 27, 8, 29, 53, 0, 0, 0}; +static const npy_datetimestruct _M_MIN_DTS = { + -17536621475646, 5, 4, 5, 53, 0, 0, 0, 0}; + +static const npy_datetimestruct _AS_MAX_DTS = { + 1970, 1, 1, 0, 0, 9, 223372, 36854, 775807}; +static const npy_datetimestruct _FS_MAX_DTS = { + 1970, 1, 1, 2, 33, 43, 372036, 854775, 807000}; +static const npy_datetimestruct _PS_MAX_DTS = { + 1970, 4, 17, 18, 2, 52, 36854, 775807, 0}; +static const npy_datetimestruct _NS_MAX_DTS = { + 2262, 4, 11, 23, 47, 16, 854775, 807000, 0}; +static const npy_datetimestruct _US_MAX_DTS = { + 294247, 1, 10, 4, 0, 54, 775807, 0, 0}; +static const npy_datetimestruct _MS_MAX_DTS = { + 292278994, 8, 17, 7, 12, 55, 807000, 0, 0}; +static const npy_datetimestruct _S_MAX_DTS = { + 292277026596, 12, 4, 15, 30, 7, 0, 0, 0}; +static const npy_datetimestruct _M_MAX_DTS = { + 17536621479585, 8, 30, 18, 7, 0, 0, 0, 0}; // stuff pandas needs // ---------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.c b/pandas/_libs/tslibs/src/datetime/pd_datetime.c new file mode 100644 index 0000000000000..73f63706f2a88 --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.c @@ -0,0 +1,98 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. + +Copyright (c) 2005-2011, NumPy Developers +All rights reserved. + +This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt + +*/ + +#define _PANDAS_DATETIME_IMPL + +#define PY_SSIZE_T_CLEAN +#include + +#include "datetime.h" +#include "pd_datetime.h" + + +static void pandas_datetime_destructor(PyObject *op) { + void *ptr = PyCapsule_GetPointer(op, PandasDateTime_CAPSULE_NAME); + PyMem_Free(ptr); +} + +static int pandas_datetime_exec(PyObject *module) { + PyDateTime_IMPORT; + PandasDateTime_CAPI *capi = PyMem_Malloc(sizeof(PandasDateTime_CAPI)); + if (capi == NULL) { + PyErr_NoMemory(); + return -1; + } + capi->npy_datetimestruct_to_datetime = npy_datetimestruct_to_datetime; + capi->scaleNanosecToUnit = scaleNanosecToUnit; + capi->int64ToIso = int64ToIso; + capi->NpyDateTimeToEpoch = NpyDateTimeToEpoch; + capi->PyDateTimeToIso = PyDateTimeToIso; + capi->PyDateTimeToEpoch = PyDateTimeToEpoch; + capi->int64ToIsoDuration = int64ToIsoDuration; + capi->pandas_datetime_to_datetimestruct = pandas_datetime_to_datetimestruct; + capi->pandas_timedelta_to_timedeltastruct = + pandas_timedelta_to_timedeltastruct; + capi->convert_pydatetime_to_datetimestruct = + convert_pydatetime_to_datetimestruct; + capi->cmp_npy_datetimestruct = cmp_npy_datetimestruct; + capi->get_datetime_metadata_from_dtype = get_datetime_metadata_from_dtype; + capi->parse_iso_8601_datetime = parse_iso_8601_datetime; + capi->get_datetime_iso_8601_strlen = get_datetime_iso_8601_strlen; + capi->make_iso_8601_datetime = make_iso_8601_datetime; + capi->make_iso_8601_timedelta = make_iso_8601_timedelta; + + PyObject *capsule = PyCapsule_New(capi, PandasDateTime_CAPSULE_NAME, + pandas_datetime_destructor); + if (capsule == NULL) { + PyMem_Free(capi); + return -1; + } + + // Monkeypatch the top level pandas module to have an attribute for the + // C-API. This is required because Python capsules do not support setting + // this attribute on anything but the top level package. Ideally not + // done when cpython gh-6898 gets implemented + PyObject *pandas = PyImport_ImportModule("pandas"); + if (!pandas) { + PyErr_SetString(PyExc_ImportError, + "pd_datetime.c could not import module pandas"); + Py_DECREF(capsule); + return -1; + } + + if (PyModule_AddObject(pandas, "_pandas_datetime_CAPI", capsule) < 0) { + Py_DECREF(capsule); + return -1; + } + + return 0; +} + +static PyModuleDef_Slot pandas_datetime_slots[] = { + {Py_mod_exec, pandas_datetime_exec}, {0, NULL}}; + +static struct PyModuleDef pandas_datetimemodule = { + PyModuleDef_HEAD_INIT, + .m_name = "pandas._libs.pandas_datetime", + + .m_doc = "Internal module with datetime support for other extensions", + .m_size = 0, + .m_methods = NULL, + .m_slots = pandas_datetime_slots}; + +PyMODINIT_FUNC PyInit_pandas_datetime(void) { + return PyModuleDef_Init(&pandas_datetimemodule); +} diff --git a/pandas/_libs/tslibs/src/datetime/pd_datetime.h b/pandas/_libs/tslibs/src/datetime/pd_datetime.h new file mode 100644 index 0000000000000..e80e9bbeb9e6c --- /dev/null +++ b/pandas/_libs/tslibs/src/datetime/pd_datetime.h @@ -0,0 +1,115 @@ +/* + +Copyright (c) 2016, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +Written by Mark Wiebe (mwwiebe@gmail.com) + +Copyright (c) 2011 by Enthought, Inc. +Copyright (c) 2005-2011, NumPy Developers + +All rights reserved. +See NUMPY_LICENSE.txt for the license. +*/ + +#ifndef PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ +#define PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ + +#ifndef NPY_NO_DEPRECATED_API +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#endif // NPY_NO_DEPRECATED_API + +#include +#include "np_datetime.h" +#include "np_datetime_strings.h" +#include "date_conversions.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + npy_datetime (*npy_datetimestruct_to_datetime)(NPY_DATETIMEUNIT, + const npy_datetimestruct *); + int (*scaleNanosecToUnit)(npy_int64 *, NPY_DATETIMEUNIT); + char *(*int64ToIso)(int64_t, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*NpyDateTimeToEpoch)(npy_datetime, NPY_DATETIMEUNIT); + char *(*PyDateTimeToIso)(PyObject *, NPY_DATETIMEUNIT, size_t *); + npy_datetime (*PyDateTimeToEpoch)(PyObject *, NPY_DATETIMEUNIT); + char *(*int64ToIsoDuration)(int64_t, size_t *); + void (*pandas_datetime_to_datetimestruct)(npy_datetime, NPY_DATETIMEUNIT, + npy_datetimestruct *); + void (*pandas_timedelta_to_timedeltastruct)(npy_datetime, NPY_DATETIMEUNIT, + pandas_timedeltastruct *); + int (*convert_pydatetime_to_datetimestruct)(PyObject *, npy_datetimestruct *); + int (*cmp_npy_datetimestruct)(const npy_datetimestruct *, + const npy_datetimestruct *); + PyArray_DatetimeMetaData (*get_datetime_metadata_from_dtype)(PyArray_Descr *); + int (*parse_iso_8601_datetime)(const char *, int, int, npy_datetimestruct *, + NPY_DATETIMEUNIT *, int *, int *, const char *, + int, FormatRequirement); + int (*get_datetime_iso_8601_strlen)(int, NPY_DATETIMEUNIT); + int (*make_iso_8601_datetime)(npy_datetimestruct *, char *, int, int, + NPY_DATETIMEUNIT); + int (*make_iso_8601_timedelta)(pandas_timedeltastruct *, char *, size_t *); +} PandasDateTime_CAPI; + +// The capsule name appears limited to module.attributename; see bpo-32414 +// cpython has an open PR gh-6898 to fix, but hasn't had traction for years +#define PandasDateTime_CAPSULE_NAME "pandas._pandas_datetime_CAPI" + +/* block used as part of public API */ +#ifndef _PANDAS_DATETIME_IMPL +static PandasDateTime_CAPI *PandasDateTimeAPI = NULL; + +#define PandasDateTime_IMPORT \ + PandasDateTimeAPI = \ + (PandasDateTime_CAPI *)PyCapsule_Import(PandasDateTime_CAPSULE_NAME, 0) + +#define npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT, npy_datetimestruct) \ + PandasDateTimeAPI->npy_datetimestruct_to_datetime((NPY_DATETIMEUNIT), \ + (npy_datetimestruct)) +#define scaleNanosecToUnit(value, unit) \ + PandasDateTimeAPI->scaleNanosecToUnit((value), (unit)) +#define int64ToIso(value, base, len) \ + PandasDateTimeAPI->int64ToIso((value), (base), (len)) +#define NpyDateTimeToEpoch(dt, base) \ + PandasDateTimeAPI->NpyDateTimeToEpoch((dt), (base)) +#define PyDateTimeToIso(obj, base, len) \ + PandasDateTimeAPI->PyDateTimeToIso((obj), (base), (len)) +#define PyDateTimeToEpoch(dt, base) \ + PandasDateTimeAPI->PyDateTimeToEpoch((dt), (base)) +#define int64ToIsoDuration(value, len) \ + PandasDateTimeAPI->int64ToIsoDuration((value), (len)) +#define pandas_datetime_to_datetimestruct(dt, base, out) \ + PandasDateTimeAPI->pandas_datetime_to_datetimestruct((dt), (base), (out)) +#define pandas_timedelta_to_timedeltastruct(td, base, out) \ + PandasDateTimeAPI->pandas_timedelta_to_timedeltastruct((td), (base), (out)) +#define convert_pydatetime_to_datetimestruct(dtobj, out) \ + PandasDateTimeAPI->convert_pydatetime_to_datetimestruct((dtobj), (out)) +#define cmp_npy_datetimestruct(a, b) \ + PandasDateTimeAPI->cmp_npy_datetimestruct((a), (b)) +#define get_datetime_metadata_from_dtype(dtype) \ + PandasDateTimeAPI->get_datetime_metadata_from_dtype((dtype)) +#define parse_iso_8601_datetime(str, len, want_exc, out, out_bestunit, \ + out_local, out_tzoffset, format, format_len, \ + format_requirement) \ + PandasDateTimeAPI->parse_iso_8601_datetime( \ + (str), (len), (want_exc), (out), (out_bestunit), (out_local), \ + (out_tzoffset), (format), (format_len), (format_requirement)) +#define get_datetime_iso_8601_strlen(local, base) \ + PandasDateTimeAPI->get_datetime_iso_8601_strlen((local), (base)) +#define make_iso_8601_datetime(dts, outstr, outlen, utc, base) \ + PandasDateTimeAPI->make_iso_8601_datetime((dts), (outstr), (outlen), (utc), \ + (base)) +#define make_iso_8601_timedelta(tds, outstr, outlen) \ + PandasDateTimeAPI->make_iso_8601_timedelta((tds), (outstr), (outlen)) +#endif /* !defined(_PANDAS_DATETIME_IMPL) */ + +#ifdef __cplusplus +} +#endif +#endif // PANDAS__LIBS_TSLIBS_SRC_DATETIME_PD_DATETIME_H_ diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index cf847746f16cd..542afa9315a60 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -57,12 +57,16 @@ from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, check_dts_bounds, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pydate_to_dt64, pydatetime_to_dt64, string_to_dts, ) + +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime from pandas._libs.tslibs.timestamps cimport _Timestamp from pandas._libs.util cimport ( diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 01755fdd65654..62cc07fabb747 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -54,12 +54,15 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_timedelta64_value, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pandas_timedelta_to_timedeltastruct, pandas_timedeltastruct, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index fb351f1e41f60..10a331f302cc4 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -88,12 +88,15 @@ from pandas._libs.tslibs.np_datetime cimport ( get_datetime64_unit, get_datetime64_value, get_unit_from_dtype, + import_pandas_datetime, npy_datetimestruct, npy_datetimestruct_to_datetime, pandas_datetime_to_datetimestruct, pydatetime_to_dtstruct, ) +import_pandas_datetime() + from pandas._libs.tslibs.np_datetime import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 13053b6a555c5..e17d264333264 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -34,10 +34,14 @@ from pandas._libs.tslibs.dtypes cimport ( from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( NPY_DATETIMEUNIT, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, pydatetime_to_dt64, ) + +import_pandas_datetime() + from pandas._libs.tslibs.timezones cimport ( get_dst_info, is_fixed_offset, diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 2f00948c71e02..f424b74c6e577 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -29,9 +29,13 @@ from .nattype cimport ( from .np_datetime cimport ( NPY_DATETIMEUNIT, NPY_FR_ns, + import_pandas_datetime, npy_datetimestruct, pandas_datetime_to_datetimestruct, ) + +import_pandas_datetime() + from .period cimport get_period_ordinal from .timestamps cimport create_timestamp_from_ts from .timezones cimport is_utc diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 94430e23b054a..463ed6051e910 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -181,6 +181,8 @@ class TestPDApi(Base): "_config", "_libs", "_is_numpy_dev", + "_pandas_datetime_CAPI", + "_pandas_parser_CAPI", "_testing", "_typing", "_version", diff --git a/setup.py b/setup.py index b6dfcc5fbdb0d..6ceb3605b9bb1 100755 --- a/setup.py +++ b/setup.py @@ -124,15 +124,17 @@ def initialize_options(self): self._clean_exclude = [ pjoin(dt, "np_datetime.c"), pjoin(dt, "np_datetime_strings.c"), + pjoin(dt, "date_conversions.c"), pjoin(parser, "tokenizer.c"), pjoin(parser, "io.c"), pjoin(ujson_python, "ujson.c"), pjoin(ujson_python, "objToJSON.c"), pjoin(ujson_python, "JSONtoObj.c"), - pjoin(ujson_python, "date_conversions.c"), pjoin(ujson_lib, "ultrajsonenc.c"), pjoin(ujson_lib, "ultrajsondec.c"), pjoin(util, "move.c"), + pjoin(tsbase, "datetime", "pd_datetime.c"), + pjoin("pandas", "_libs", "pd_parser.c"), ] for root, dirs, files in os.walk("pandas"): @@ -337,7 +339,7 @@ def run(self): if os.environ.get("PANDAS_CI", "0") == "1": extra_compile_args.append("-Werror") if debugging_symbols_requested: - extra_compile_args.append("-g") + extra_compile_args.append("-g3") extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-O0") @@ -434,9 +436,9 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): klib_include = ["pandas/_libs/src/klib"] +tseries_includes = ["pandas/_libs/tslibs/src/datetime"] tseries_depends = [ - "pandas/_libs/tslibs/src/datetime/np_datetime.h", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.h", + "pandas/_libs/tslibs/src/datetime/pd_datetime.h", ] ext_data = { @@ -473,19 +475,15 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pyxfile": "_libs/lib", "depends": lib_depends + tseries_depends, "include": klib_include, # due to tokenizer import - "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.missing": {"pyxfile": "_libs/missing", "depends": tseries_depends}, "_libs.parsers": { "pyxfile": "_libs/parsers", - "include": klib_include + ["pandas/_libs/src"], + "include": klib_include + ["pandas/_libs/src", "pandas/_libs"], "depends": [ "pandas/_libs/src/parser/tokenizer.h", "pandas/_libs/src/parser/io.h", - ], - "sources": [ - "pandas/_libs/src/parser/tokenizer.c", - "pandas/_libs/src/parser/io.c", + "pandas/_libs/src/pd_parser.h", ], }, "_libs.reduction": {"pyxfile": "_libs/reduction"}, @@ -497,7 +495,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslib": { "pyxfile": "_libs/tslib", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.base": {"pyxfile": "_libs/tslibs/base"}, "_libs.tslibs.ccalendar": {"pyxfile": "_libs/tslibs/ccalendar"}, @@ -505,63 +502,54 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.conversion": { "pyxfile": "_libs/tslibs/conversion", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + "include": klib_include, }, "_libs.tslibs.fields": { "pyxfile": "_libs/tslibs/fields", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.nattype": {"pyxfile": "_libs/tslibs/nattype"}, "_libs.tslibs.np_datetime": { "pyxfile": "_libs/tslibs/np_datetime", "depends": tseries_depends, - "sources": [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", - ], + "includes": tseries_includes, }, "_libs.tslibs.offsets": { "pyxfile": "_libs/tslibs/offsets", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], + "includes": tseries_includes, }, "_libs.tslibs.parsing": { "pyxfile": "_libs/tslibs/parsing", - "include": klib_include, + "include": tseries_includes + klib_include, "depends": ["pandas/_libs/src/parser/tokenizer.h"], "sources": ["pandas/_libs/src/parser/tokenizer.c"], }, "_libs.tslibs.period": { "pyxfile": "_libs/tslibs/period", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.strptime": { "pyxfile": "_libs/tslibs/strptime", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timedeltas": { "pyxfile": "_libs/tslibs/timedeltas", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timestamps": { "pyxfile": "_libs/tslibs/timestamps", + "include": tseries_includes, "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.timezones": {"pyxfile": "_libs/tslibs/timezones"}, "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.vectorized": { "pyxfile": "_libs/tslibs/vectorized", "depends": tseries_depends, - "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.testing": {"pyxfile": "_libs/testing"}, "_libs.window.aggregations": { @@ -627,28 +615,23 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "pandas._libs.json", depends=[ "pandas/_libs/src/ujson/lib/ultrajson.h", - "pandas/_libs/src/ujson/python/date_conversions.h", + "pandas/_libs/tslibs/src/datetime/pd_datetime.h", ], sources=( [ "pandas/_libs/src/ujson/python/ujson.c", "pandas/_libs/src/ujson/python/objToJSON.c", - "pandas/_libs/src/ujson/python/date_conversions.c", "pandas/_libs/src/ujson/python/JSONtoObj.c", "pandas/_libs/src/ujson/lib/ultrajsonenc.c", "pandas/_libs/src/ujson/lib/ultrajsondec.c", ] - + [ - "pandas/_libs/tslibs/src/datetime/np_datetime.c", - "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", - ] ), include_dirs=[ "pandas/_libs/src/ujson/python", "pandas/_libs/src/ujson/lib", - "pandas/_libs/src/datetime", numpy.get_include(), - ], + ] + + tseries_includes, extra_compile_args=(extra_compile_args), extra_link_args=extra_link_args, define_macros=macros, @@ -657,6 +640,59 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): extensions.append(ujson_ext) +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# pd_datetime +pd_dt_ext = Extension( + "pandas._libs.pandas_datetime", + depends=["pandas/_libs/tslibs/datetime/pd_datetime.h"], + sources=( + [ + "pandas/_libs/tslibs/src/datetime/np_datetime.c", + "pandas/_libs/tslibs/src/datetime/np_datetime_strings.c", + "pandas/_libs/tslibs/src/datetime/date_conversions.c", + "pandas/_libs/tslibs/src/datetime/pd_datetime.c", + ] + ), + include_dirs=tseries_includes + + [ + numpy.get_include(), + ], + extra_compile_args=(extra_compile_args), + extra_link_args=extra_link_args, + define_macros=macros, +) + + +extensions.append(pd_dt_ext) + +# ---------------------------------------------------------------------- + +# ---------------------------------------------------------------------- +# pd_datetime +pd_parser_ext = Extension( + "pandas._libs.pandas_parser", + depends=["pandas/_libs/pd_parser.h"], + sources=( + [ + "pandas/_libs/src/parser/tokenizer.c", + "pandas/_libs/src/parser/io.c", + "pandas/_libs/pd_parser.c", + ] + ), + include_dirs=[ + "pandas/_libs/src/klib", + ], + extra_compile_args=(extra_compile_args), + extra_link_args=extra_link_args, + define_macros=macros, +) + + +extensions.append(pd_parser_ext) + + # ----------------------------------------------------------------------