pandas-dev · lithomas1 · Mar 18, 2023 · Mar 16, 2023 · Mar 16, 2023 · Mar 16, 2023
diff --git a/.circleci/setup_env.sh b/.circleci/setup_env.sh
@@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then
 fi
 
 echo "Build extensions"
-# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
-python setup.py build_ext -q -j1
+python setup.py build_ext -q -j4
 
 echo "Install pandas"
 python -m pip install --no-build-isolation --no-use-pep517 -e .

@@ -16,7 +16,5 @@ runs:
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
       shell: bash -el {0}
       env:
-        # Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
-        # GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
-        N_JOBS: 1
-        #N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
+        # https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
+        N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}
@@ -30,7 +30,7 @@ runs:
         environment-name: ${{ inputs.environment-name }}
         extra-specs: ${{ inputs.extra-specs }}
         channels: conda-forge
-        channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
+        channel-priority: 'strict'
         condarc-file: ci/condarc.yml
         cache-env: true
         cache-downloads: true
@@ -40,7 +40,7 @@ jobs:
           python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
           python -m pip install versioneer[toml] && \
           python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
-          python setup.py build_ext -q -j1 && \
+          python setup.py build_ext -q -j$(nproc) && \
           python -m pip install --no-build-isolation --no-use-pep517 -e . && \
           python -m pip list && \
           export PANDAS_CI=1 && \

@@ -82,10 +82,9 @@ jobs:
         python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
         python -m pip list
 
-    # GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
     - name: Build Pandas
       run: |
-        python setup.py build_ext -q -j1
+        python setup.py build_ext -q -j4
         python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
 
     - name: Build Version

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data
 # Selectively re-add *.cxx files that were excluded above
 graft pandas/_libs/src
 graft pandas/_libs/tslibs/src
+include pandas/_libs/pd_parser.h
+include pandas/_libs/pd_parser.c
diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py
@@ -10,6 +10,11 @@
 ]
 
 
+# Below imports needs to happen first to ensure pandas top level
+# module gets monkeypatched with the pandas_datetime_CAPI
+# see pandas_datetime_exec in pd_datetime.c
+import pandas._libs.pandas_parser  # noqa # isort: skip # type: ignore[reportUnusedImport]
+import pandas._libs.pandas_datetime  # noqa # isort: skip # type: ignore[reportUnusedImport]
 from pandas._libs.interval import Interval
 from pandas._libs.tslibs import (
     NaT,

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
@@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT
 from pandas._libs.tslibs.np_datetime cimport (
     NPY_DATETIMEUNIT,
     get_unit_from_dtype,
+    import_pandas_datetime,
 )
+
+import_pandas_datetime()
+
+
 from pandas._libs.tslibs.period cimport is_period_object
 from pandas._libs.tslibs.timedeltas cimport _Timedelta
 from pandas._libs.tslibs.timestamps cimport _Timestamp

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h":
 cdef extern from "numpy/ndarrayobject.h":
     bint PyArray_CheckScalar(obj) nogil
 
-
-cdef extern from "src/parse_helper.h":
+cdef extern from "pd_parser.h":
     int floatify(object, float64_t *result, int *maybe_int) except -1
+    void PandasParser_IMPORT()
+
+PandasParser_IMPORT
 
 from pandas._libs cimport util
 from pandas._libs.util cimport (

diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx
@@ -34,8 +34,11 @@ from pandas._libs.tslibs.np_datetime cimport (
     get_datetime64_unit,
     get_datetime64_value,
     get_timedelta64_value,
+    import_pandas_datetime,
 )
 
+import_pandas_datetime()
+
 from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
 
 cdef:

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h":
         int64_t skip_first_N_rows
         int64_t skipfooter
         # pick one, depending on whether the converter requires GIL
-        float64_t (*double_converter)(const char *, char **,
-                                      char, char, char,
-                                      int, int *, int *) nogil
+        double (*double_converter)(const char *, char **,
+                                   char, char, char,
+                                   int, int *, int *) nogil
 
         #  error handling
         char *warn_msg
@@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h":
         int seen_uint
         int seen_null
 
+    void COLITER_NEXT(coliter_t, const char *) nogil
+
+cdef extern from "pd_parser.h":
+    void *new_rd_source(object obj) except NULL
+
+    int del_rd_source(void *src)
+
+    void* buffer_rd_bytes(void *source, size_t nbytes,
+                          size_t *bytes_read, int *status, const char *encoding_errors)
+
     void uint_state_init(uint_state *self)
     int uint64_conflict(uint_state *self)
 
@@ -279,26 +289,49 @@ cdef extern from "parser/tokenizer.h":
     uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
                            uint64_t uint_max, int *error, char tsep) nogil
 
-    float64_t xstrtod(const char *p, char **q, char decimal,
+    double xstrtod(const char *p, char **q, char decimal,
+                   char sci, char tsep, int skip_trailing,
+                   int *error, int *maybe_int) nogil
+    double precise_xstrtod(const char *p, char **q, char decimal,
+                           char sci, char tsep, int skip_trailing,
+                           int *error, int *maybe_int) nogil
+    double round_trip(const char *p, char **q, char decimal,
                       char sci, char tsep, int skip_trailing,
                       int *error, int *maybe_int) nogil
-    float64_t precise_xstrtod(const char *p, char **q, char decimal,
-                              char sci, char tsep, int skip_trailing,
-                              int *error, int *maybe_int) nogil
-    float64_t round_trip(const char *p, char **q, char decimal,
-                         char sci, char tsep, int skip_trailing,
-                         int *error, int *maybe_int) nogil
 
     int to_boolean(const char *item, uint8_t *val) nogil
 
+    void PandasParser_IMPORT()
 
-cdef extern from "parser/io.h":
-    void *new_rd_source(object obj) except NULL
+PandasParser_IMPORT
 
-    int del_rd_source(void *src)
+# When not invoked directly but rather assigned as a function,
+# cdef extern'ed declarations seem to leave behind an undefined symbol
+cdef double xstrtod_wrapper(const char *p, char **q, char decimal,
+                            char sci, char tsep, int skip_trailing,
+                            int *error, int *maybe_int) nogil:
+    return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
 
-    void* buffer_rd_bytes(void *source, size_t nbytes,
-                          size_t *bytes_read, int *status, const char *encoding_errors)
+
+cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal,
+                                    char sci, char tsep, int skip_trailing,
+                                    int *error, int *maybe_int) nogil:
+    return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
+
+
+cdef double round_trip_wrapper(const char *p, char **q, char decimal,
+                               char sci, char tsep, int skip_trailing,
+                               int *error, int *maybe_int) nogil:
+    return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
+
+
+cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
+                                   size_t *bytes_read, int *status,
+                                   const char *encoding_errors) noexcept:
+    return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)
+
+cdef int del_rd_source_wrapper(void *src) noexcept:
+    return del_rd_source(src)
 
 
 cdef class TextReader:
@@ -485,11 +518,11 @@ cdef class TextReader:
 
         if float_precision == "round_trip":
             # see gh-15140
-            self.parser.double_converter = round_trip
+            self.parser.double_converter = round_trip_wrapper
         elif float_precision == "legacy":
-            self.parser.double_converter = xstrtod
+            self.parser.double_converter = xstrtod_wrapper
         elif float_precision == "high" or float_precision is None:
-            self.parser.double_converter = precise_xstrtod
+            self.parser.double_converter = precise_xstrtod_wrapper
         else:
             raise ValueError(f"Unrecognized float_precision option: "
                              f"{float_precision}")
@@ -607,8 +640,8 @@ cdef class TextReader:
 
         ptr = new_rd_source(source)
         self.parser.source = ptr
-        self.parser.cb_io = &buffer_rd_bytes
-        self.parser.cb_cleanup = &del_rd_source
+        self.parser.cb_io = buffer_rd_bytes_wrapper
+        self.parser.cb_cleanup = del_rd_source_wrapper
 
     cdef _get_header(self, list prelim_header):
         # header is now a list of lists, so field_count should use header[0]