From e21c3c9307d845fe228b0df2f1b0e4814990028a Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Wed, 11 Dec 2024 14:56:25 +0100 Subject: [PATCH 1/5] Use PyWeakref_GetRef and critical section in BlockValuesRefs --- pandas/_libs/internals.pyx | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 99737776ff59f..08e0b40c1e533 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,9 +3,10 @@ from collections import defaultdict cimport cython from cpython.object cimport PyObject from cpython.pyport cimport PY_SSIZE_T_MAX +from cpython.ref cimport Py_DECREF from cpython.slice cimport PySlice_GetIndicesEx from cpython.weakref cimport ( - PyWeakref_GetObject, + PyWeakref_GetRef, PyWeakref_NewRef, ) from cython cimport Py_ssize_t @@ -908,11 +909,18 @@ cdef class BlockValuesRefs: # if force=False. Clearing for every insertion causes slowdowns if # all these objects stay alive, e.g. df.items() for wide DataFrames # see GH#55245 and GH#55008 + cdef PyObject* pobj + cdef bint status + if force or len(self.referenced_blocks) > self.clear_counter: - self.referenced_blocks = [ - ref for ref in self.referenced_blocks - if PyWeakref_GetObject(ref) != Py_None - ] + new_referenced_blocks = [] + for ref in self.referenced_blocks: + status = PyWeakref_GetRef(ref, &pobj) + if status == 1: + new_referenced_blocks.append(ref) + Py_DECREF(pobj) + self.referenced_blocks = new_referenced_blocks + nr_of_refs = len(self.referenced_blocks) if nr_of_refs < self.clear_counter // 2: self.clear_counter = max(self.clear_counter // 2, 500) @@ -927,8 +935,9 @@ cdef class BlockValuesRefs: blk : Block The block that the new references should point to. """ - self._clear_dead_references() - self.referenced_blocks.append(PyWeakref_NewRef(blk, None)) + with cython.critical_section(self): + self._clear_dead_references() + self.referenced_blocks.append(PyWeakref_NewRef(blk, None)) def add_index_reference(self, index: object) -> None: """Adds a new reference to our reference collection when creating an index. @@ -938,8 +947,9 @@ cdef class BlockValuesRefs: index : Index The index that the new reference should point to. """ - self._clear_dead_references() - self.referenced_blocks.append(PyWeakref_NewRef(index, None)) + with cython.critical_section(self): + self._clear_dead_references() + self.referenced_blocks.append(PyWeakref_NewRef(index, None)) def has_reference(self) -> bool: """Checks if block has foreign references. @@ -951,6 +961,8 @@ cdef class BlockValuesRefs: ------- bool """ - self._clear_dead_references(force=True) - # Checking for more references than block pointing to itself - return len(self.referenced_blocks) > 1 + with cython.critical_section(self): + self._clear_dead_references(force=True) + # Checking for more references than block pointing to itself + has_reference = len(self.referenced_blocks) > 1 + return has_reference From 232f97a36370aefe5fdfca8a0b01b065b3edb5ce Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 23 Jan 2025 15:05:29 +0100 Subject: [PATCH 2/5] Add conditional compilation for critical sections --- meson.build | 4 ++ pandas/_libs/free_threading_config.pxi.in | 3 + pandas/_libs/internals.pyx | 78 +++++++++++++++-------- pandas/_libs/meson.build | 7 ++ 4 files changed, 67 insertions(+), 25 deletions(-) create mode 100644 pandas/_libs/free_threading_config.pxi.in diff --git a/meson.build b/meson.build index 66583095a6e77..f04659e519429 100644 --- a/meson.build +++ b/meson.build @@ -45,8 +45,12 @@ else endif cy = meson.get_compiler('cython') +cdata = configuration_data() if cy.version().version_compare('>=3.1.0') add_project_arguments('-Xfreethreading_compatible=true', language: 'cython') + cdata.set('freethreading_compatible', '1') +else + cdata.set('freethreading_compatible', '0') endif # Needed by pandas.test() when it looks for the pytest ini options diff --git a/pandas/_libs/free_threading_config.pxi.in b/pandas/_libs/free_threading_config.pxi.in new file mode 100644 index 0000000000000..fe7ca399389ab --- /dev/null +++ b/pandas/_libs/free_threading_config.pxi.in @@ -0,0 +1,3 @@ +# Autogenerated file containing Cython compile-time defines + +DEF CYTHON_COMPATIBLE_WITH_FREE_THREADING = @freethreading_compatible@ diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 08e0b40c1e533..44ae092f07add 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -3,12 +3,8 @@ from collections import defaultdict cimport cython from cpython.object cimport PyObject from cpython.pyport cimport PY_SSIZE_T_MAX -from cpython.ref cimport Py_DECREF from cpython.slice cimport PySlice_GetIndicesEx -from cpython.weakref cimport ( - PyWeakref_GetRef, - PyWeakref_NewRef, -) +from cpython.weakref cimport PyWeakref_NewRef from cython cimport Py_ssize_t import numpy as np @@ -30,6 +26,14 @@ from pandas._libs.util cimport ( is_integer_object, ) +include "free_threading_config.pxi" + +IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + from cpython.weakref cimport PyWeakref_GetRef + from cpython.ref cimport Py_DECREF +ELSE: + from cpython.weakref cimport PyWeakref_GetObject + cdef extern from "Python.h": PyObject* Py_None @@ -909,17 +913,24 @@ cdef class BlockValuesRefs: # if force=False. Clearing for every insertion causes slowdowns if # all these objects stay alive, e.g. df.items() for wide DataFrames # see GH#55245 and GH#55008 - cdef PyObject* pobj - cdef bint status + IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + cdef PyObject* pobj + cdef bint status if force or len(self.referenced_blocks) > self.clear_counter: - new_referenced_blocks = [] - for ref in self.referenced_blocks: - status = PyWeakref_GetRef(ref, &pobj) - if status == 1: - new_referenced_blocks.append(ref) - Py_DECREF(pobj) - self.referenced_blocks = new_referenced_blocks + IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + new_referenced_blocks = [] + for ref in self.referenced_blocks: + status = PyWeakref_GetRef(ref, &pobj) + if status == 1: + new_referenced_blocks.append(ref) + Py_DECREF(pobj) + self.referenced_blocks = new_referenced_blocks + ELSE: + self.referenced_blocks = [ + ref for ref in self.referenced_blocks + if PyWeakref_GetObject(ref) != Py_None + ] nr_of_refs = len(self.referenced_blocks) if nr_of_refs < self.clear_counter // 2: @@ -927,6 +938,10 @@ cdef class BlockValuesRefs: elif nr_of_refs > self.clear_counter: self.clear_counter = max(self.clear_counter * 2, nr_of_refs) + cpdef _add_reference_maybe_locked(self, Block blk): + self._clear_dead_references() + self.referenced_blocks.append(PyWeakref_NewRef(blk, None)) + cpdef add_reference(self, Block blk): """Adds a new reference to our reference collection. @@ -935,9 +950,15 @@ cdef class BlockValuesRefs: blk : Block The block that the new references should point to. """ - with cython.critical_section(self): - self._clear_dead_references() - self.referenced_blocks.append(PyWeakref_NewRef(blk, None)) + IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + with cython.critical_section(self): + self._add_reference_maybe_locked(blk) + ELSE: + self._add_reference_maybe_locked(blk) + + def _add_index_reference_maybe_locked(self, index: object) -> None: + self._clear_dead_references() + self.referenced_blocks.append(PyWeakref_NewRef(index, None)) def add_index_reference(self, index: object) -> None: """Adds a new reference to our reference collection when creating an index. @@ -947,9 +968,16 @@ cdef class BlockValuesRefs: index : Index The index that the new reference should point to. """ - with cython.critical_section(self): - self._clear_dead_references() - self.referenced_blocks.append(PyWeakref_NewRef(index, None)) + IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + with cython.critical_section(self): + self._add_index_reference_maybe_locked(index) + ELSE: + self._add_index_reference_maybe_locked(index) + + def _has_reference_maybe_locked(self) -> bool: + self._clear_dead_references(force=True) + # Checking for more references than block pointing to itself + return len(self.referenced_blocks) > 1 def has_reference(self) -> bool: """Checks if block has foreign references. @@ -961,8 +989,8 @@ cdef class BlockValuesRefs: ------- bool """ - with cython.critical_section(self): - self._clear_dead_references(force=True) - # Checking for more references than block pointing to itself - has_reference = len(self.referenced_blocks) > 1 - return has_reference + IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: + with cython.critical_section(self): + return self._has_reference_maybe_locked() + ELSE: + return self._has_reference_maybe_locked() diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 5fb6f1118d648..a4ea6048b6bb2 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -50,6 +50,13 @@ _khash_primitive_helper_dep = declare_dependency( sources: _khash_primitive_helper, ) +_free_threading_config = configure_file( + input: 'free_threading_config.pxi.in', + output: 'free_threading_config.pxi', + configuration: cdata, + install: false +) + subdir('tslibs') libs_sources = { From defbcc0f62c0a2ec653bb23d314d6af5272c2bcb Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Thu, 23 Jan 2025 15:13:09 +0100 Subject: [PATCH 3/5] run pre-commit --- pandas/_libs/internals.pyx | 2 +- pandas/_libs/meson.build | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 44ae092f07add..7244365dee472 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -29,8 +29,8 @@ from pandas._libs.util cimport ( include "free_threading_config.pxi" IF CYTHON_COMPATIBLE_WITH_FREE_THREADING: - from cpython.weakref cimport PyWeakref_GetRef from cpython.ref cimport Py_DECREF + from cpython.weakref cimport PyWeakref_GetRef ELSE: from cpython.weakref cimport PyWeakref_GetObject diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index a4ea6048b6bb2..0a9012415529a 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -54,7 +54,7 @@ _free_threading_config = configure_file( input: 'free_threading_config.pxi.in', output: 'free_threading_config.pxi', configuration: cdata, - install: false + install: false, ) subdir('tslibs') From 142c168aaa0a69ec9694cae8ef3d057dbc97ce6e Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Mon, 27 Jan 2025 17:23:40 +0100 Subject: [PATCH 4/5] Address feedback; move cdata in meson.build --- meson.build | 4 ---- pandas/_libs/meson.build | 6 ++++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/meson.build b/meson.build index f04659e519429..66583095a6e77 100644 --- a/meson.build +++ b/meson.build @@ -45,12 +45,8 @@ else endif cy = meson.get_compiler('cython') -cdata = configuration_data() if cy.version().version_compare('>=3.1.0') add_project_arguments('-Xfreethreading_compatible=true', language: 'cython') - cdata.set('freethreading_compatible', '1') -else - cdata.set('freethreading_compatible', '0') endif # Needed by pandas.test() when it looks for the pytest ini options diff --git a/pandas/_libs/meson.build b/pandas/_libs/meson.build index 0a9012415529a..a50976767928a 100644 --- a/pandas/_libs/meson.build +++ b/pandas/_libs/meson.build @@ -50,6 +50,12 @@ _khash_primitive_helper_dep = declare_dependency( sources: _khash_primitive_helper, ) +cdata = configuration_data() +if cy.version().version_compare('>=3.1.0') + cdata.set('freethreading_compatible', '1') +else + cdata.set('freethreading_compatible', '0') +endif _free_threading_config = configure_file( input: 'free_threading_config.pxi.in', output: 'free_threading_config.pxi', From 49125a602bcc10e543880458869e763ba3ca327b Mon Sep 17 00:00:00 2001 From: Lysandros Nikolaou Date: Mon, 27 Jan 2025 17:25:13 +0100 Subject: [PATCH 5/5] Early return in case of error --- pandas/_libs/internals.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 7244365dee472..252512be4d1bb 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -922,7 +922,9 @@ cdef class BlockValuesRefs: new_referenced_blocks = [] for ref in self.referenced_blocks: status = PyWeakref_GetRef(ref, &pobj) - if status == 1: + if status == -1: + return + elif status == 1: new_referenced_blocks.append(ref) Py_DECREF(pobj) self.referenced_blocks = new_referenced_blocks