Skip to content

Revert Cython upgrade #54497

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/asv.conf.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
// pip (with all the conda available packages installed first,
// followed by the pip installed packages).
"matrix": {
"Cython": ["3.0.0"],
"Cython": ["0.29.33"],
"matplotlib": [],
"sqlalchemy": [],
"scipy": [],
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311-pyarrownightly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ dependencies:
# build dependencies
- versioneer[toml]
- meson[ninja]=1.0.1
- cython>=3.0.0
- cython>=0.29.33
- meson-python=0.13.1

# test dependencies
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-pypy-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
2 changes: 1 addition & 1 deletion ci/deps/circle-310-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython>=3.0.0
- cython>=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
1 change: 0 additions & 1 deletion doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -873,7 +873,6 @@ Other
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
- Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
- The minimum version of Cython needed to compile pandas is now ``3.0.0`` (:issue:`54335`)

.. ---------------------------------------------------------------------------
.. _whatsnew_210.contributors:
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ dependencies:

# build dependencies
- versioneer[toml]
- cython=3.0.0
- cython=0.29.33
- meson[ninja]=1.0.1
- meson-python=0.13.1

Expand Down
117 changes: 109 additions & 8 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -998,7 +998,8 @@ def rank_1d(

N = len(values)
if labels is not None:
assert len(labels) == N
# TODO(cython3): cast won't be necessary (#2992)
assert <Py_ssize_t>len(labels) == N
out = np.empty(N)
grp_sizes = np.ones(N, dtype=np.int64)

Expand Down Expand Up @@ -1087,7 +1088,8 @@ cdef void rank_sorted_1d(
float64_t[::1] out,
int64_t[::1] grp_sizes,
const intp_t[:] sort_indexer,
const numeric_object_t[:] masked_vals,
# TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
numeric_object_t[:] masked_vals,
const uint8_t[:] mask,
bint check_mask,
Py_ssize_t N,
Expand Down Expand Up @@ -1142,7 +1144,108 @@ cdef void rank_sorted_1d(
# array that we sorted previously, which gives us the location of
# that sorted value for retrieval back from the original
# values / masked_vals arrays
with gil(numeric_object_t is object):
# TODO(cython3): de-duplicate once cython supports conditional nogil
if numeric_object_t is object:
with gil:
for i in range(N):
at_end = i == N - 1

# dups and sum_ranks will be incremented each loop where
# the value / group remains the same, and should be reset
# when either of those change. Used to calculate tiebreakers
dups += 1
sum_ranks += i - grp_start + 1

next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
masked_vals[sort_indexer[i+1]])

# We'll need this check later anyway to determine group size, so just
# compute it here since shortcircuiting won't help
group_changed = at_end or (check_labels and
(labels[sort_indexer[i]]
!= labels[sort_indexer[i+1]]))

# Update out only when there is a transition of values or labels.
# When a new value or group is encountered, go back #dups steps(
# the number of occurrence of current value) and assign the ranks
# based on the starting index of the current group (grp_start)
# and the current index
if (next_val_diff or group_changed or (check_mask and
(mask[sort_indexer[i]]
^ mask[sort_indexer[i+1]]))):

# If keep_na, check for missing values and assign back
# to the result where appropriate
if keep_na and check_mask and mask[sort_indexer[i]]:
grp_na_count = dups
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = NaN
elif tiebreak == TIEBREAK_AVERAGE:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = sum_ranks / <float64_t>dups
elif tiebreak == TIEBREAK_MIN:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = i - grp_start - dups + 2
elif tiebreak == TIEBREAK_MAX:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = i - grp_start + 1

# With n as the previous rank in the group and m as the number
# of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
# then rankings should be n + 1, n + 2 ... n + m
elif tiebreak == TIEBREAK_FIRST:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = j + 1 - grp_start

# If TIEBREAK_FIRST and descending, the ranking should be
# n + m, n + (m - 1) ... n + 1. This is equivalent to
# (i - dups + 1) + (i - j + 1) - grp_start
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
elif tiebreak == TIEBREAK_DENSE:
for j in range(i - dups + 1, i + 1):
out[sort_indexer[j]] = grp_vals_seen

# Look forward to the next value (using the sorting in
# lexsort_indexer). If the value does not equal the current
# value then we need to reset the dups and sum_ranks, knowing
# that a new value is coming up. The conditional also needs
# to handle nan equality and the end of iteration. If group
# changes we do not record seeing a new value in the group
if not group_changed and (next_val_diff or (check_mask and
(mask[sort_indexer[i]]
^ mask[sort_indexer[i+1]]))):
dups = sum_ranks = 0
grp_vals_seen += 1

# Similar to the previous conditional, check now if we are
# moving to a new group. If so, keep track of the index where
# the new group occurs, so the tiebreaker calculations can
# decrement that from their position. Fill in the size of each
# group encountered (used by pct calculations later). Also be
# sure to reset any of the items helping to calculate dups
if group_changed:

# If not dense tiebreak, group size used to compute
# percentile will be # of non-null elements in group
if tiebreak != TIEBREAK_DENSE:
grp_size = i - grp_start + 1 - grp_na_count

# Otherwise, it will be the number of distinct values
# in the group, subtracting 1 if NaNs are present
# since that is a distinct value we shouldn't count
else:
grp_size = grp_vals_seen - (grp_na_count > 0)

for j in range(grp_start, i + 1):
grp_sizes[sort_indexer[j]] = grp_size

dups = sum_ranks = 0
grp_na_count = 0
grp_start = i + 1
grp_vals_seen = 1
else:
for i in range(N):
at_end = i == N - 1

Expand Down Expand Up @@ -1371,18 +1474,16 @@ ctypedef fused out_t:
@cython.boundscheck(False)
@cython.wraparound(False)
def diff_2d(
# TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
ndarray[diff_t, ndim=2] arr,
out_t[:, :] out,
ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr"
ndarray[out_t, ndim=2] out,
Py_ssize_t periods,
int axis,
bint datetimelike=False,
):
cdef:
Py_ssize_t i, j, sx, sy, start, stop
bint f_contig = arr.flags.f_contiguous
# TODO: change to this when arr becomes a memoryview
# bint f_contig = arr.is_f_contig()
# bint f_contig = arr.is_f_contig() # TODO(cython3)
diff_t left, right

# Disable for unsupported dtype combinations,
Expand Down
2 changes: 1 addition & 1 deletion pandas/_libs/arrays.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class NDArrayBacked:
def size(self) -> int: ...
@property
def nbytes(self) -> int: ...
def copy(self, order=...): ...
def copy(self): ...
def delete(self, loc, axis=...): ...
def swapaxes(self, axis1, axis2): ...
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/arrays.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,8 @@ cdef class NDArrayBacked:

@property
def size(self) -> int:
return self._ndarray.size
# TODO(cython3): use self._ndarray.size
return cnp.PyArray_SIZE(self._ndarray)

@property
def nbytes(self) -> int:
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/groupby.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def group_fillna_indexer(
labels: np.ndarray, # ndarray[int64_t]
sorted_labels: npt.NDArray[np.intp],
mask: npt.NDArray[np.uint8],
direction: Literal["ffill", "bfill"],
limit: int, # int64_t
dropna: bool,
) -> None: ...
Expand All @@ -54,7 +55,7 @@ def group_any_all(
mask: np.ndarray, # const uint8_t[::1]
val_test: Literal["any", "all"],
skipna: bool,
result_mask: np.ndarray | None,
nullable: bool,
) -> None: ...
def group_sum(
out: np.ndarray, # complexfloatingintuint_t[:, ::1]
Expand Down
Loading