Skip to content

Commit 74cbabf

Browse files
authored
Merge branch 'main' into feature/44764_perf_issue_new_period
2 parents 4becd71 + 6169cba commit 74cbabf

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+932
-376
lines changed

.circleci/setup_env.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,7 @@ if pip list | grep -q ^pandas; then
5555
fi
5656

5757
echo "Build extensions"
58-
# GH 47305: Parallel build can causes flaky ImportError from pandas/_libs/tslibs
59-
python setup.py build_ext -q -j1
58+
python setup.py build_ext -q -j4
6059

6160
echo "Install pandas"
6261
python -m pip install --no-build-isolation --no-use-pep517 -e .

.github/actions/build_pandas/action.yml

+2-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,5 @@ runs:
1616
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
1717
shell: bash -el {0}
1818
env:
19-
# Cannot use parallel compilation on Windows, see https://github.com/pandas-dev/pandas/issues/30873
20-
# GH 47305: Parallel build causes flaky ImportError: /home/runner/work/pandas/pandas/pandas/_libs/tslibs/timestamps.cpython-38-x86_64-linux-gnu.so: undefined symbol: pandas_datetime_to_datetimestruct
21-
N_JOBS: 1
22-
#N_JOBS: ${{ runner.os == 'Windows' && 1 || 2 }}
19+
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners#supported-runners-and-hardware-resources
20+
N_JOBS: ${{ runner.os == 'macOS' && 3 || 2 }}

.github/actions/setup-conda/action.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ runs:
3030
environment-name: ${{ inputs.environment-name }}
3131
extra-specs: ${{ inputs.extra-specs }}
3232
channels: conda-forge
33-
channel-priority: ${{ runner.os == 'macOS' && 'flexible' || 'strict' }}
33+
channel-priority: 'strict'
3434
condarc-file: ci/condarc.yml
3535
cache-env: true
3636
cache-downloads: true

.github/workflows/32-bit-linux.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
python -m pip install --no-deps -U pip wheel 'setuptools<60.0.0' && \
4343
python -m pip install versioneer[toml] && \
4444
python -m pip install cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.34.2 && \
45-
python setup.py build_ext -q -j1 && \
45+
python setup.py build_ext -q -j$(nproc) && \
4646
python -m pip install --no-build-isolation --no-use-pep517 -e . && \
4747
python -m pip list && \
4848
export PANDAS_CI=1 && \

.github/workflows/python-dev.yml

+1-2
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,9 @@ jobs:
8282
python -m pip install python-dateutil pytz cython hypothesis>=6.34.2 pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-cov pytest-asyncio>=0.17
8383
python -m pip list
8484
85-
# GH 47305: Parallel build can cause flaky ImportError from pandas/_libs/tslibs
8685
- name: Build Pandas
8786
run: |
88-
python setup.py build_ext -q -j1
87+
python setup.py build_ext -q -j4
8988
python -m pip install -e . --no-build-isolation --no-use-pep517 --no-index
9089
9190
- name: Build Version

.github/workflows/ubuntu.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ jobs:
142142
- 5432:5432
143143

144144
moto:
145-
image: motoserver/moto
145+
image: motoserver/moto:4.1.4
146146
env:
147147
AWS_ACCESS_KEY_ID: foobar_key
148148
AWS_SECRET_ACCESS_KEY: foobar_secret

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ repos:
3131
rev: v0.0.253
3232
hooks:
3333
- id: ruff
34+
args: [--exit-non-zero-on-fix]
3435
- repo: https://github.com/jendrikseipp/vulture
3536
rev: 'v2.7'
3637
hooks:

MANIFEST.in

+2
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,5 @@ prune pandas/tests/io/parser/data
5858
# Selectively re-add *.cxx files that were excluded above
5959
graft pandas/_libs/src
6060
graft pandas/_libs/tslibs/src
61+
include pandas/_libs/pd_parser.h
62+
include pandas/_libs/pd_parser.c

doc/source/development/contributing_environment.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ locally before pushing your changes. It's recommended to also install the :ref:`
2121
Step 1: install a C compiler
2222
----------------------------
2323

24-
How to do this will depend on your platform. If you choose to user ``Docker``
24+
How to do this will depend on your platform. If you choose to use ``Docker``
2525
in the next step, then you can skip this step.
2626

2727
**Windows**

doc/source/ecosystem.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,8 @@ aggregations for step functions defined over real numbers, datetime and timedelt
377377

378378
xarray brings the labeled data power of pandas to the physical sciences by
379379
providing N-dimensional variants of the core pandas data structures. It aims to
380-
provide a pandas-like and pandas-compatible toolkit for analytics on multi-
381-
dimensional arrays, rather than the tabular data for which pandas excels.
380+
provide a pandas-like and pandas-compatible toolkit for analytics on
381+
multi-dimensional arrays, rather than the tabular data for which pandas excels.
382382

383383

384384
.. _ecosystem.io:

doc/source/user_guide/integer_na.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ with the dtype.
5858
.. warning::
5959

6060
Currently :meth:`pandas.array` and :meth:`pandas.Series` use different
61-
rules for dtype inference. :meth:`pandas.array` will infer a nullable-
62-
integer dtype
61+
rules for dtype inference. :meth:`pandas.array` will infer a
62+
nullable-integer dtype
6363

6464
.. ipython:: python
6565

pandas/_libs/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,11 @@
1010
]
1111

1212

13+
# Below imports needs to happen first to ensure pandas top level
14+
# module gets monkeypatched with the pandas_datetime_CAPI
15+
# see pandas_datetime_exec in pd_datetime.c
16+
import pandas._libs.pandas_parser # noqa # isort: skip # type: ignore[reportUnusedImport]
17+
import pandas._libs.pandas_datetime # noqa # isort: skip # type: ignore[reportUnusedImport]
1318
from pandas._libs.interval import Interval
1419
from pandas._libs.tslibs import (
1520
NaT,

pandas/_libs/index.pyx

+5
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ from pandas._libs.tslibs.nattype cimport c_NaT as NaT
2020
from pandas._libs.tslibs.np_datetime cimport (
2121
NPY_DATETIMEUNIT,
2222
get_unit_from_dtype,
23+
import_pandas_datetime,
2324
)
25+
26+
import_pandas_datetime()
27+
28+
2429
from pandas._libs.tslibs.period cimport is_period_object
2530
from pandas._libs.tslibs.timedeltas cimport _Timedelta
2631
from pandas._libs.tslibs.timestamps cimport _Timestamp

pandas/_libs/lib.pyx

+4-2
Original file line numberDiff line numberDiff line change
@@ -88,9 +88,11 @@ cdef extern from "numpy/arrayobject.h":
8888
cdef extern from "numpy/ndarrayobject.h":
8989
bint PyArray_CheckScalar(obj) nogil
9090

91-
92-
cdef extern from "src/parse_helper.h":
91+
cdef extern from "pd_parser.h":
9392
int floatify(object, float64_t *result, int *maybe_int) except -1
93+
void PandasParser_IMPORT()
94+
95+
PandasParser_IMPORT
9496

9597
from pandas._libs cimport util
9698
from pandas._libs.util cimport (

pandas/_libs/missing.pyx

+3
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,11 @@ from pandas._libs.tslibs.np_datetime cimport (
3434
get_datetime64_unit,
3535
get_datetime64_value,
3636
get_timedelta64_value,
37+
import_pandas_datetime,
3738
)
3839

40+
import_pandas_datetime()
41+
3942
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
4043

4144
cdef:

pandas/_libs/parsers.pyx

+53-20
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,9 @@ cdef extern from "parser/tokenizer.h":
229229
int64_t skip_first_N_rows
230230
int64_t skipfooter
231231
# pick one, depending on whether the converter requires GIL
232-
float64_t (*double_converter)(const char *, char **,
233-
char, char, char,
234-
int, int *, int *) nogil
232+
double (*double_converter)(const char *, char **,
233+
char, char, char,
234+
int, int *, int *) nogil
235235

236236
# error handling
237237
char *warn_msg
@@ -249,6 +249,16 @@ cdef extern from "parser/tokenizer.h":
249249
int seen_uint
250250
int seen_null
251251

252+
void COLITER_NEXT(coliter_t, const char *) nogil
253+
254+
cdef extern from "pd_parser.h":
255+
void *new_rd_source(object obj) except NULL
256+
257+
int del_rd_source(void *src)
258+
259+
void* buffer_rd_bytes(void *source, size_t nbytes,
260+
size_t *bytes_read, int *status, const char *encoding_errors)
261+
252262
void uint_state_init(uint_state *self)
253263
int uint64_conflict(uint_state *self)
254264

@@ -279,26 +289,49 @@ cdef extern from "parser/tokenizer.h":
279289
uint64_t str_to_uint64(uint_state *state, char *p_item, int64_t int_max,
280290
uint64_t uint_max, int *error, char tsep) nogil
281291

282-
float64_t xstrtod(const char *p, char **q, char decimal,
292+
double xstrtod(const char *p, char **q, char decimal,
293+
char sci, char tsep, int skip_trailing,
294+
int *error, int *maybe_int) nogil
295+
double precise_xstrtod(const char *p, char **q, char decimal,
296+
char sci, char tsep, int skip_trailing,
297+
int *error, int *maybe_int) nogil
298+
double round_trip(const char *p, char **q, char decimal,
283299
char sci, char tsep, int skip_trailing,
284300
int *error, int *maybe_int) nogil
285-
float64_t precise_xstrtod(const char *p, char **q, char decimal,
286-
char sci, char tsep, int skip_trailing,
287-
int *error, int *maybe_int) nogil
288-
float64_t round_trip(const char *p, char **q, char decimal,
289-
char sci, char tsep, int skip_trailing,
290-
int *error, int *maybe_int) nogil
291301

292302
int to_boolean(const char *item, uint8_t *val) nogil
293303

304+
void PandasParser_IMPORT()
294305

295-
cdef extern from "parser/io.h":
296-
void *new_rd_source(object obj) except NULL
306+
PandasParser_IMPORT
297307

298-
int del_rd_source(void *src)
308+
# When not invoked directly but rather assigned as a function,
309+
# cdef extern'ed declarations seem to leave behind an undefined symbol
310+
cdef double xstrtod_wrapper(const char *p, char **q, char decimal,
311+
char sci, char tsep, int skip_trailing,
312+
int *error, int *maybe_int) nogil:
313+
return xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
299314

300-
void* buffer_rd_bytes(void *source, size_t nbytes,
301-
size_t *bytes_read, int *status, const char *encoding_errors)
315+
316+
cdef double precise_xstrtod_wrapper(const char *p, char **q, char decimal,
317+
char sci, char tsep, int skip_trailing,
318+
int *error, int *maybe_int) nogil:
319+
return precise_xstrtod(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
320+
321+
322+
cdef double round_trip_wrapper(const char *p, char **q, char decimal,
323+
char sci, char tsep, int skip_trailing,
324+
int *error, int *maybe_int) nogil:
325+
return round_trip(p, q, decimal, sci, tsep, skip_trailing, error, maybe_int)
326+
327+
328+
cdef void* buffer_rd_bytes_wrapper(void *source, size_t nbytes,
329+
size_t *bytes_read, int *status,
330+
const char *encoding_errors) noexcept:
331+
return buffer_rd_bytes(source, nbytes, bytes_read, status, encoding_errors)
332+
333+
cdef int del_rd_source_wrapper(void *src) noexcept:
334+
return del_rd_source(src)
302335

303336

304337
cdef class TextReader:
@@ -487,11 +520,11 @@ cdef class TextReader:
487520

488521
if float_precision == "round_trip":
489522
# see gh-15140
490-
self.parser.double_converter = round_trip
523+
self.parser.double_converter = round_trip_wrapper
491524
elif float_precision == "legacy":
492-
self.parser.double_converter = xstrtod
525+
self.parser.double_converter = xstrtod_wrapper
493526
elif float_precision == "high" or float_precision is None:
494-
self.parser.double_converter = precise_xstrtod
527+
self.parser.double_converter = precise_xstrtod_wrapper
495528
else:
496529
raise ValueError(f"Unrecognized float_precision option: "
497530
f"{float_precision}")
@@ -610,8 +643,8 @@ cdef class TextReader:
610643

611644
ptr = new_rd_source(source)
612645
self.parser.source = ptr
613-
self.parser.cb_io = &buffer_rd_bytes
614-
self.parser.cb_cleanup = &del_rd_source
646+
self.parser.cb_io = buffer_rd_bytes_wrapper
647+
self.parser.cb_cleanup = del_rd_source_wrapper
615648

616649
cdef _get_header(self, list prelim_header):
617650
# header is now a list of lists, so field_count should use header[0]

0 commit comments

Comments
 (0)