Skip to content

Commit 3edb82b

Browse files
authored
Revert Cython upgrade (#54497)
* Revert "CLN: Cython 3 cleanups (#54482)" This reverts commit a936863. * Revert "DEPS: Bump cython 3.0 (#54335)" This reverts commit 4cf63ea.
1 parent 0582e35 commit 3edb82b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+412
-171
lines changed

asv_bench/asv.conf.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
// pip (with all the conda available packages installed first,
4242
// followed by the pip installed packages).
4343
"matrix": {
44-
"Cython": ["3.0.0"],
44+
"Cython": ["0.29.33"],
4545
"matplotlib": [],
4646
"sqlalchemy": [],
4747
"scipy": [],

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# build dependencies
88
- versioneer[toml]
9-
- cython>=3.0.0
9+
- cython>=0.29.33
1010
- meson[ninja]=1.0.1
1111
- meson-python=0.13.1
1212

ci/deps/actions-311-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77

88
# build dependencies
99
- versioneer[toml]
10-
- cython>=3.0.0
10+
- cython>=0.29.33
1111
- meson[ninja]=1.0.1
1212
- meson-python=0.13.1
1313

ci/deps/actions-311-pyarrownightly.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ dependencies:
77
# build dependencies
88
- versioneer[toml]
99
- meson[ninja]=1.0.1
10-
- cython>=3.0.0
10+
- cython>=0.29.33
1111
- meson-python=0.13.1
1212

1313
# test dependencies

ci/deps/actions-311.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# build dependencies
88
- versioneer[toml]
9-
- cython>=3.0.0
9+
- cython>=0.29.33
1010
- meson[ninja]=1.0.1
1111
- meson-python=0.13.1
1212

ci/deps/actions-39-minimum_versions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies:
88

99
# build dependencies
1010
- versioneer[toml]
11-
- cython>=3.0.0
11+
- cython>=0.29.33
1212
- meson[ninja]=1.0.1
1313
- meson-python=0.13.1
1414

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# build dependencies
88
- versioneer[toml]
9-
- cython>=3.0.0
9+
- cython>=0.29.33
1010
- meson[ninja]=1.0.1
1111
- meson-python=0.13.1
1212

ci/deps/actions-pypy-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies:
99

1010
# build dependencies
1111
- versioneer[toml]
12-
- cython>=3.0.0
12+
- cython>=0.29.33
1313
- meson[ninja]=1.0.1
1414
- meson-python=0.13.1
1515

ci/deps/circle-310-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ dependencies:
66

77
# build dependencies
88
- versioneer[toml]
9-
- cython>=3.0.0
9+
- cython>=0.29.33
1010
- meson[ninja]=1.0.1
1111
- meson-python=0.13.1
1212

doc/source/whatsnew/v2.1.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,6 @@ Other
873873
- Bug in :meth:`Series.memory_usage` when ``deep=True`` throw an error with Series of objects and the returned value is incorrect, as it does not take into account GC corrections (:issue:`51858`)
874874
- Bug in :meth:`period_range` the default behavior when freq was not passed as an argument was incorrect(:issue:`53687`)
875875
- Fixed incorrect ``__name__`` attribute of ``pandas._libs.json`` (:issue:`52898`)
876-
- The minimum version of Cython needed to compile pandas is now ``3.0.0`` (:issue:`54335`)
877876

878877
.. ---------------------------------------------------------------------------
879878
.. _whatsnew_210.contributors:

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ dependencies:
88

99
# build dependencies
1010
- versioneer[toml]
11-
- cython=3.0.0
11+
- cython=0.29.33
1212
- meson[ninja]=1.0.1
1313
- meson-python=0.13.1
1414

pandas/_libs/algos.pyx

+109-8
Original file line numberDiff line numberDiff line change
@@ -998,7 +998,8 @@ def rank_1d(
998998

999999
N = len(values)
10001000
if labels is not None:
1001-
assert len(labels) == N
1001+
# TODO(cython3): cast won't be necessary (#2992)
1002+
assert <Py_ssize_t>len(labels) == N
10021003
out = np.empty(N)
10031004
grp_sizes = np.ones(N, dtype=np.int64)
10041005

@@ -1087,7 +1088,8 @@ cdef void rank_sorted_1d(
10871088
float64_t[::1] out,
10881089
int64_t[::1] grp_sizes,
10891090
const intp_t[:] sort_indexer,
1090-
const numeric_object_t[:] masked_vals,
1091+
# TODO(cython3): make const (https://github.com/cython/cython/issues/3222)
1092+
numeric_object_t[:] masked_vals,
10911093
const uint8_t[:] mask,
10921094
bint check_mask,
10931095
Py_ssize_t N,
@@ -1142,7 +1144,108 @@ cdef void rank_sorted_1d(
11421144
# array that we sorted previously, which gives us the location of
11431145
# that sorted value for retrieval back from the original
11441146
# values / masked_vals arrays
1145-
with gil(numeric_object_t is object):
1147+
# TODO(cython3): de-duplicate once cython supports conditional nogil
1148+
if numeric_object_t is object:
1149+
with gil:
1150+
for i in range(N):
1151+
at_end = i == N - 1
1152+
1153+
# dups and sum_ranks will be incremented each loop where
1154+
# the value / group remains the same, and should be reset
1155+
# when either of those change. Used to calculate tiebreakers
1156+
dups += 1
1157+
sum_ranks += i - grp_start + 1
1158+
1159+
next_val_diff = at_end or are_diff(masked_vals[sort_indexer[i]],
1160+
masked_vals[sort_indexer[i+1]])
1161+
1162+
# We'll need this check later anyway to determine group size, so just
1163+
# compute it here since shortcircuiting won't help
1164+
group_changed = at_end or (check_labels and
1165+
(labels[sort_indexer[i]]
1166+
!= labels[sort_indexer[i+1]]))
1167+
1168+
# Update out only when there is a transition of values or labels.
1169+
# When a new value or group is encountered, go back #dups steps(
1170+
# the number of occurrence of current value) and assign the ranks
1171+
# based on the starting index of the current group (grp_start)
1172+
# and the current index
1173+
if (next_val_diff or group_changed or (check_mask and
1174+
(mask[sort_indexer[i]]
1175+
^ mask[sort_indexer[i+1]]))):
1176+
1177+
# If keep_na, check for missing values and assign back
1178+
# to the result where appropriate
1179+
if keep_na and check_mask and mask[sort_indexer[i]]:
1180+
grp_na_count = dups
1181+
for j in range(i - dups + 1, i + 1):
1182+
out[sort_indexer[j]] = NaN
1183+
elif tiebreak == TIEBREAK_AVERAGE:
1184+
for j in range(i - dups + 1, i + 1):
1185+
out[sort_indexer[j]] = sum_ranks / <float64_t>dups
1186+
elif tiebreak == TIEBREAK_MIN:
1187+
for j in range(i - dups + 1, i + 1):
1188+
out[sort_indexer[j]] = i - grp_start - dups + 2
1189+
elif tiebreak == TIEBREAK_MAX:
1190+
for j in range(i - dups + 1, i + 1):
1191+
out[sort_indexer[j]] = i - grp_start + 1
1192+
1193+
# With n as the previous rank in the group and m as the number
1194+
# of duplicates in this stretch, if TIEBREAK_FIRST and ascending,
1195+
# then rankings should be n + 1, n + 2 ... n + m
1196+
elif tiebreak == TIEBREAK_FIRST:
1197+
for j in range(i - dups + 1, i + 1):
1198+
out[sort_indexer[j]] = j + 1 - grp_start
1199+
1200+
# If TIEBREAK_FIRST and descending, the ranking should be
1201+
# n + m, n + (m - 1) ... n + 1. This is equivalent to
1202+
# (i - dups + 1) + (i - j + 1) - grp_start
1203+
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
1204+
for j in range(i - dups + 1, i + 1):
1205+
out[sort_indexer[j]] = 2 * i - j - dups + 2 - grp_start
1206+
elif tiebreak == TIEBREAK_DENSE:
1207+
for j in range(i - dups + 1, i + 1):
1208+
out[sort_indexer[j]] = grp_vals_seen
1209+
1210+
# Look forward to the next value (using the sorting in
1211+
# lexsort_indexer). If the value does not equal the current
1212+
# value then we need to reset the dups and sum_ranks, knowing
1213+
# that a new value is coming up. The conditional also needs
1214+
# to handle nan equality and the end of iteration. If group
1215+
# changes we do not record seeing a new value in the group
1216+
if not group_changed and (next_val_diff or (check_mask and
1217+
(mask[sort_indexer[i]]
1218+
^ mask[sort_indexer[i+1]]))):
1219+
dups = sum_ranks = 0
1220+
grp_vals_seen += 1
1221+
1222+
# Similar to the previous conditional, check now if we are
1223+
# moving to a new group. If so, keep track of the index where
1224+
# the new group occurs, so the tiebreaker calculations can
1225+
# decrement that from their position. Fill in the size of each
1226+
# group encountered (used by pct calculations later). Also be
1227+
# sure to reset any of the items helping to calculate dups
1228+
if group_changed:
1229+
1230+
# If not dense tiebreak, group size used to compute
1231+
# percentile will be # of non-null elements in group
1232+
if tiebreak != TIEBREAK_DENSE:
1233+
grp_size = i - grp_start + 1 - grp_na_count
1234+
1235+
# Otherwise, it will be the number of distinct values
1236+
# in the group, subtracting 1 if NaNs are present
1237+
# since that is a distinct value we shouldn't count
1238+
else:
1239+
grp_size = grp_vals_seen - (grp_na_count > 0)
1240+
1241+
for j in range(grp_start, i + 1):
1242+
grp_sizes[sort_indexer[j]] = grp_size
1243+
1244+
dups = sum_ranks = 0
1245+
grp_na_count = 0
1246+
grp_start = i + 1
1247+
grp_vals_seen = 1
1248+
else:
11461249
for i in range(N):
11471250
at_end = i == N - 1
11481251

@@ -1371,18 +1474,16 @@ ctypedef fused out_t:
13711474
@cython.boundscheck(False)
13721475
@cython.wraparound(False)
13731476
def diff_2d(
1374-
# TODO: cython bug (post Cython 3) prevents update to "const diff_t[:, :] arr"
1375-
ndarray[diff_t, ndim=2] arr,
1376-
out_t[:, :] out,
1477+
ndarray[diff_t, ndim=2] arr, # TODO(cython3) update to "const diff_t[:, :] arr"
1478+
ndarray[out_t, ndim=2] out,
13771479
Py_ssize_t periods,
13781480
int axis,
13791481
bint datetimelike=False,
13801482
):
13811483
cdef:
13821484
Py_ssize_t i, j, sx, sy, start, stop
13831485
bint f_contig = arr.flags.f_contiguous
1384-
# TODO: change to this when arr becomes a memoryview
1385-
# bint f_contig = arr.is_f_contig()
1486+
# bint f_contig = arr.is_f_contig() # TODO(cython3)
13861487
diff_t left, right
13871488

13881489
# Disable for unsupported dtype combinations,

pandas/_libs/arrays.pyi

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ class NDArrayBacked:
2626
def size(self) -> int: ...
2727
@property
2828
def nbytes(self) -> int: ...
29-
def copy(self, order=...): ...
29+
def copy(self): ...
3030
def delete(self, loc, axis=...): ...
3131
def swapaxes(self, axis1, axis2): ...
3232
def repeat(self, repeats: int | Sequence[int], axis: int | None = ...): ...

pandas/_libs/arrays.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ cdef class NDArrayBacked:
126126

127127
@property
128128
def size(self) -> int:
129-
return self._ndarray.size
129+
# TODO(cython3): use self._ndarray.size
130+
return cnp.PyArray_SIZE(self._ndarray)
130131

131132
@property
132133
def nbytes(self) -> int:

pandas/_libs/groupby.pyi

+2-1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def group_fillna_indexer(
4444
labels: np.ndarray, # ndarray[int64_t]
4545
sorted_labels: npt.NDArray[np.intp],
4646
mask: npt.NDArray[np.uint8],
47+
direction: Literal["ffill", "bfill"],
4748
limit: int, # int64_t
4849
dropna: bool,
4950
) -> None: ...
@@ -54,7 +55,7 @@ def group_any_all(
5455
mask: np.ndarray, # const uint8_t[::1]
5556
val_test: Literal["any", "all"],
5657
skipna: bool,
57-
result_mask: np.ndarray | None,
58+
nullable: bool,
5859
) -> None: ...
5960
def group_sum(
6061
out: np.ndarray, # complexfloatingintuint_t[:, ::1]

0 commit comments

Comments
 (0)