Skip to content

Commit 6931051

Browse files
jbrockmendeljreback
authored andcommitted
CLN: cython and docstring cleanups (#29089)
1 parent cb76dcb commit 6931051

17 files changed

+67
-72
lines changed

pandas/_libs/algos_common_helper.pxi.in

+2-3
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,11 @@ dtypes = [('float64', 'float64_t', 'float64_t'),
1717
def get_dispatch(dtypes):
1818

1919
for name, c_type, dest_type, in dtypes:
20-
dest_name = dest_type[:-2] # i.e. strip "_t"
21-
yield name, c_type, dest_type, dest_name
20+
yield name, c_type, dest_type
2221

2322
}}
2423

25-
{{for name, c_type, dest_type, dest_name
24+
{{for name, c_type, dest_type
2625
in get_dispatch(dtypes)}}
2726

2827

pandas/_libs/algos_take_helper.pxi.in

+33-29
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,28 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1010

1111
{{py:
1212

13-
# name, dest, c_type_in, c_type_out, preval, postval, can_copy, nogil
13+
# c_type_in, c_type_out, preval, postval
1414
dtypes = [
15-
('bool', 'bool', 'uint8_t', 'uint8_t', '', '', True),
16-
('bool', 'object', 'uint8_t', 'object',
17-
'True if ', ' > 0 else False', False),
18-
('int8', 'int8', 'int8_t', 'int8_t', '', '', True),
19-
('int8', 'int32', 'int8_t', 'int32_t', '', '', False),
20-
('int8', 'int64', 'int8_t', 'int64_t', '', '', False),
21-
('int8', 'float64', 'int8_t', 'float64_t', '', '', False),
22-
('int16', 'int16', 'int16_t', 'int16_t', '', '', True),
23-
('int16', 'int32', 'int16_t', 'int32_t', '', '', False),
24-
('int16', 'int64', 'int16_t', 'int64_t', '', '', False),
25-
('int16', 'float64', 'int16_t', 'float64_t', '', '', False),
26-
('int32', 'int32', 'int32_t', 'int32_t', '', '', True),
27-
('int32', 'int64', 'int32_t', 'int64_t', '', '', False),
28-
('int32', 'float64', 'int32_t', 'float64_t', '', '', False),
29-
('int64', 'int64', 'int64_t', 'int64_t', '', '', True),
30-
('int64', 'float64', 'int64_t', 'float64_t', '', '', False),
31-
('float32', 'float32', 'float32_t', 'float32_t', '', '', True),
32-
('float32', 'float64', 'float32_t', 'float64_t', '', '', False),
33-
('float64', 'float64', 'float64_t', 'float64_t', '', '', True),
34-
('object', 'object', 'object', 'object', '', '', False)]
15+
('uint8_t', 'uint8_t', '', ''),
16+
('uint8_t', 'object', 'True if ', ' > 0 else False'),
17+
('int8_t', 'int8_t', '', ''),
18+
('int8_t', 'int32_t', '', ''),
19+
('int8_t', 'int64_t', '', ''),
20+
('int8_t', 'float64_t', '', ''),
21+
('int16_t', 'int16_t', '', ''),
22+
('int16_t', 'int32_t', '', ''),
23+
('int16_t', 'int64_t', '', ''),
24+
('int16_t', 'float64_t', '', ''),
25+
('int32_t', 'int32_t', '', ''),
26+
('int32_t', 'int64_t', '', ''),
27+
('int32_t', 'float64_t', '', ''),
28+
('int64_t', 'int64_t', '', ''),
29+
('int64_t', 'float64_t', '', ''),
30+
('float32_t', 'float32_t', '', ''),
31+
('float32_t', 'float64_t', '', ''),
32+
('float64_t', 'float64_t', '', ''),
33+
('object', 'object', '', ''),
34+
]
3535

3636

3737
def get_dispatch(dtypes):
@@ -117,9 +117,9 @@ def get_dispatch(dtypes):
117117
out[i, j] = %(preval)svalues[i, idx]%(postval)s
118118
"""
119119

120-
for (name, dest, c_type_in, c_type_out, preval, postval,
121-
can_copy) in dtypes:
120+
for (c_type_in, c_type_out, preval, postval) in dtypes:
122121

122+
can_copy = c_type_in == c_type_out != "object"
123123
nogil = c_type_out != "object"
124124
if nogil:
125125
nogil_str = "with nogil:"
@@ -128,6 +128,16 @@ def get_dispatch(dtypes):
128128
nogil_str = ''
129129
tab = ''
130130

131+
def get_name(dtype_name):
132+
if dtype_name == "object":
133+
return "object"
134+
if dtype_name == "uint8_t":
135+
return "bool"
136+
return dtype_name[:-2]
137+
138+
name = get_name(c_type_in)
139+
dest = get_name(c_type_out)
140+
131141
args = dict(name=name, dest=dest, c_type_in=c_type_in,
132142
c_type_out=c_type_out, preval=preval, postval=postval,
133143
can_copy=can_copy, nogil_str=nogil_str, tab=tab)
@@ -291,9 +301,3 @@ cdef _take_2d(ndarray[take_t, ndim=2] values, object idx):
291301
for j in range(K):
292302
result[i, j] = values[i, indexer[i, j]]
293303
return result
294-
295-
296-
_take_2d_object = _take_2d[object]
297-
_take_2d_float64 = _take_2d[float64_t]
298-
_take_2d_int64 = _take_2d[int64_t]
299-
_take_2d_uint64 = _take_2d[uint64_t]

pandas/_libs/hashtable_func_helper.pxi.in

+4-2
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,14 @@ def duplicated_{{dtype}}({{scalar}}[:] values, object keep='first'):
151151

152152
if keep == 'last':
153153
{{if dtype == 'object'}}
154-
for i from n > i >= 0:
154+
for i in range(n - 1, -1, -1):
155+
# equivalent: range(n)[::-1], which cython doesnt like in nogil
155156
kh_put_{{ttype}}(table, <PyObject*>values[i], &ret)
156157
out[i] = ret == 0
157158
{{else}}
158159
with nogil:
159-
for i from n > i >= 0:
160+
for i in range(n - 1, -1, -1):
161+
# equivalent: range(n)[::-1], which cython doesnt like in nogil
160162
kh_put_{{ttype}}(table, values[i], &ret)
161163
out[i] = ret == 0
162164
{{endif}}

pandas/_libs/join.pyx

+9-15
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,16 @@ from pandas._libs.algos import (
1313
)
1414

1515

16+
@cython.boundscheck(False)
1617
def inner_join(const int64_t[:] left, const int64_t[:] right,
1718
Py_ssize_t max_groups):
1819
cdef:
1920
Py_ssize_t i, j, k, count = 0
2021
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
2122
ndarray[int64_t] left_indexer, right_indexer
2223
int64_t lc, rc
24+
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
25+
Py_ssize_t offset
2326

2427
# NA group in location 0
2528

@@ -34,11 +37,6 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
3437
if rc > 0 and lc > 0:
3538
count += lc * rc
3639

37-
# group 0 is the NA group
38-
cdef:
39-
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
40-
Py_ssize_t offset
41-
4240
# exclude the NA group
4341
left_pos = left_count[0]
4442
right_pos = right_count[0]
@@ -64,6 +62,7 @@ def inner_join(const int64_t[:] left, const int64_t[:] right,
6462
_get_result_indexer(right_sorter, right_indexer))
6563

6664

65+
@cython.boundscheck(False)
6766
def left_outer_join(const int64_t[:] left, const int64_t[:] right,
6867
Py_ssize_t max_groups, sort=True):
6968
cdef:
@@ -72,6 +71,8 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
7271
ndarray rev
7372
ndarray[int64_t] left_indexer, right_indexer
7473
int64_t lc, rc
74+
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
75+
Py_ssize_t offset
7576

7677
# NA group in location 0
7778

@@ -85,11 +86,6 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
8586
else:
8687
count += left_count[i]
8788

88-
# group 0 is the NA group
89-
cdef:
90-
Py_ssize_t loc, left_pos = 0, right_pos = 0, position = 0
91-
Py_ssize_t offset
92-
9389
# exclude the NA group
9490
left_pos = left_count[0]
9591
right_pos = right_count[0]
@@ -137,13 +133,16 @@ def left_outer_join(const int64_t[:] left, const int64_t[:] right,
137133
return left_indexer, right_indexer
138134

139135

136+
@cython.boundscheck(False)
140137
def full_outer_join(const int64_t[:] left, const int64_t[:] right,
141138
Py_ssize_t max_groups):
142139
cdef:
143140
Py_ssize_t i, j, k, count = 0
144141
ndarray[int64_t] left_count, right_count, left_sorter, right_sorter
145142
ndarray[int64_t] left_indexer, right_indexer
146143
int64_t lc, rc
144+
int64_t left_pos = 0, right_pos = 0
145+
Py_ssize_t offset, position = 0
147146

148147
# NA group in location 0
149148

@@ -160,11 +159,6 @@ def full_outer_join(const int64_t[:] left, const int64_t[:] right,
160159
else:
161160
count += lc + rc
162161

163-
# group 0 is the NA group
164-
cdef:
165-
int64_t left_pos = 0, right_pos = 0
166-
Py_ssize_t offset, position = 0
167-
168162
# exclude the NA group
169163
left_pos = left_count[0]
170164
right_pos = right_count[0]

pandas/_libs/sparse_op_helper.pxi.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def get_op(tup):
8686
'and': '{0} & {1}', # logical op
8787
'or': '{0} | {1}'}
8888

89-
return ops_dict[opname].format(lval, rval, dtype)
89+
return ops_dict[opname].format(lval, rval)
9090

9191

9292
def get_dispatch(dtypes):

pandas/_libs/window.pyx

+4-4
Original file line numberDiff line numberDiff line change
@@ -1296,7 +1296,7 @@ cdef _roll_min_max_variable(ndarray[numeric] values,
12961296
# The original impl didn't deal with variable window sizes
12971297
# So the code was optimized for that
12981298

1299-
for i from starti[0] <= i < endi[0]:
1299+
for i in range(starti[0], endi[0]):
13001300
ai = init_mm(values[i], &nobs, is_max)
13011301

13021302
# Discard previous entries if we find new min or max
@@ -1644,7 +1644,7 @@ def roll_generic(object obj,
16441644
else:
16451645

16461646
# truncated windows at the beginning, through first full-length window
1647-
for i from 0 <= i < (int_min(win, N) - offset):
1647+
for i in range((int_min(win, N) - offset)):
16481648
if counts[i] >= minp:
16491649
output[i] = func(arr[0: (i + offset + 1)], *args, **kwargs)
16501650
else:
@@ -1654,7 +1654,7 @@ def roll_generic(object obj,
16541654
buf = <float64_t *>arr.data
16551655
bufarr = np.empty(win, dtype=float)
16561656
oldbuf = <float64_t *>bufarr.data
1657-
for i from (win - offset) <= i < (N - offset):
1657+
for i in range((win - offset), (N - offset)):
16581658
buf = buf + 1
16591659
bufarr.data = <char *>buf
16601660
if counts[i] >= minp:
@@ -1664,7 +1664,7 @@ def roll_generic(object obj,
16641664
bufarr.data = <char *>oldbuf
16651665

16661666
# truncated windows at the end
1667-
for i from int_max(N - offset, 0) <= i < N:
1667+
for i in range(int_max(N - offset, 0), N):
16681668
if counts[i] >= minp:
16691669
output[i] = func(arr[int_max(i + offset - win + 1, 0): N],
16701670
*args,

pandas/core/arrays/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1104,7 +1104,7 @@ def _create_method(cls, op, coerce_to_dtype=True):
11041104
----------
11051105
op : function
11061106
An operator that takes arguments op(a, b)
1107-
coerce_to_dtype : bool, default True
1107+
coerce_to_dtype : bool, default True
11081108
boolean indicating whether to attempt to convert
11091109
the result to the underlying ExtensionArray dtype.
11101110
If it's not possible to create a new ExtensionArray with the

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2481,7 +2481,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
24812481
like searching / selecting subsets of the data.
24822482
append : bool, default False
24832483
For Table formats, append the input data to the existing.
2484-
data_columns : list of columns or True, optional
2484+
data_columns : list of columns or True, optional
24852485
List of columns to create as indexed data columns for on-disk
24862486
queries, or True to use all columns. By default only the axes
24872487
of the object are indexed. See :ref:`io.hdf5-query-data-columns`.

pandas/core/groupby/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2080,7 +2080,7 @@ def rank(
20802080
* dense: like 'min', but rank always increases by 1 between groups
20812081
ascending : bool, default True
20822082
False for ranks by high (1) to low (N).
2083-
na_option : {'keep', 'top', 'bottom'}, default 'keep'
2083+
na_option : {'keep', 'top', 'bottom'}, default 'keep'
20842084
* keep: leave NA values where they are
20852085
* top: smallest rank if ascending
20862086
* bottom: smallest rank if descending

pandas/core/indexes/base.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2031,7 +2031,7 @@ def fillna(self, value=None, downcast=None):
20312031
20322032
Parameters
20332033
----------
2034-
how : {'any', 'all'}, default 'any'
2034+
how : {'any', 'all'}, default 'any'
20352035
If the Index is a MultiIndex, drop the value when any or all levels
20362036
are NaN.
20372037
@@ -5016,12 +5016,11 @@ def _validate_indexer(self, form, key, kind):
50165016
50175017
Returns
50185018
-------
5019-
label : object
5019+
label : object
50205020
50215021
Notes
50225022
-----
50235023
Value of `side` parameter should be validated in caller.
5024-
50255024
"""
50265025

50275026
@Appender(_index_shared_docs["_maybe_cast_slice_bound"])

pandas/core/indexes/datetimes.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1079,12 +1079,11 @@ def _maybe_cast_slice_bound(self, label, side, kind):
10791079
10801080
Returns
10811081
-------
1082-
label : object
1082+
label : object
10831083
10841084
Notes
10851085
-----
10861086
Value of `side` parameter should be validated in caller.
1087-
10881087
"""
10891088
assert kind in ["ix", "loc", "getitem", None]
10901089

pandas/core/indexes/timedeltas.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -550,7 +550,6 @@ def _maybe_cast_slice_bound(self, label, side, kind):
550550
"""
551551
If label is a string, cast it to timedelta according to resolution.
552552
553-
554553
Parameters
555554
----------
556555
label : object
@@ -559,8 +558,7 @@ def _maybe_cast_slice_bound(self, label, side, kind):
559558
560559
Returns
561560
-------
562-
label : object
563-
561+
label : object
564562
"""
565563
assert kind in ["ix", "loc", "getitem", None]
566564

pandas/core/missing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ def _akima_interpolate(xi, yi, x, der=0, axis=0):
420420
----------
421421
xi : array_like
422422
A sorted list of x-coordinates, of length N.
423-
yi : array_like
423+
yi : array_like
424424
A 1-D array of real values. `yi`'s length along the interpolation
425425
axis must be equal to the length of `xi`. If N-D array, use axis
426426
parameter to select correct axis.

pandas/core/ops/docstrings.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def _make_flex_doc(op_name, typ):
387387
----------
388388
other : scalar, sequence, Series, or DataFrame
389389
Any single or multiple element data structure, or list-like object.
390-
axis : {{0 or 'index', 1 or 'columns'}}
390+
axis : {{0 or 'index', 1 or 'columns'}}
391391
Whether to compare by the index (0 or 'index') or columns
392392
(1 or 'columns'). For Series input, axis to match Series index on.
393393
level : int or label
@@ -541,7 +541,7 @@ def _make_flex_doc(op_name, typ):
541541
----------
542542
other : scalar, sequence, Series, or DataFrame
543543
Any single or multiple element data structure, or list-like object.
544-
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
544+
axis : {{0 or 'index', 1 or 'columns'}}, default 'columns'
545545
Whether to compare by the index (0 or 'index') or columns
546546
(1 or 'columns').
547547
level : int or label

pandas/core/strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1343,7 +1343,7 @@ def str_pad(arr, width, side="left", fillchar=" "):
13431343
character. Equivalent to ``Series.str.pad(side='right')``.
13441344
Series.str.center : Fills boths sides of strings with an arbitrary
13451345
character. Equivalent to ``Series.str.pad(side='both')``.
1346-
Series.str.zfill : Pad strings in the Series/Index by prepending '0'
1346+
Series.str.zfill : Pad strings in the Series/Index by prepending '0'
13471347
character. Equivalent to ``Series.str.pad(side='left', fillchar='0')``.
13481348
13491349
Examples

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1027,7 +1027,7 @@ def append(
10271027
/ selecting subsets of the data
10281028
append : bool, default True
10291029
Append the input data to the existing.
1030-
data_columns : list of columns, or True, default None
1030+
data_columns : list of columns, or True, default None
10311031
List of columns to create as indexed data columns for on-disk
10321032
queries, or True to use all columns. By default only the axes
10331033
of the object are indexed. See `here

pandas/tests/plotting/common.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ def _check_ax_scales(self, axes, xaxis="linear", yaxis="linear"):
311311
axes : matplotlib Axes object, or its list-like
312312
xaxis : {'linear', 'log'}
313313
expected xaxis scale
314-
yaxis : {'linear', 'log'}
314+
yaxis : {'linear', 'log'}
315315
expected yaxis scale
316316
"""
317317
axes = self._flatten_visible(axes)
@@ -329,7 +329,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, figsize=None):
329329
axes_num : number
330330
expected number of axes. Unnecessary axes should be set to
331331
invisible.
332-
layout : tuple
332+
layout : tuple
333333
expected layout, (expected number of rows , columns)
334334
figsize : tuple
335335
expected figsize. default is matplotlib default

0 commit comments

Comments
 (0)