Skip to content

Commit fb01a69

Browse files
jbrockmendeltm9k1
authored andcommitted
REF: Fuse all the types (pandas-dev#23022)
1 parent f6ef197 commit fb01a69

5 files changed

+295
-278
lines changed

pandas/_libs/algos_common_helper.pxi.in

+14-17
Original file line numberDiff line numberDiff line change
@@ -16,33 +16,30 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
1616

1717
{{py:
1818

19-
# name, c_type, dest_type, dest_dtype
20-
dtypes = [('float64', 'float64_t', 'float64_t', 'np.float64'),
21-
('float32', 'float32_t', 'float32_t', 'np.float32'),
22-
('int8', 'int8_t', 'float32_t', 'np.float32'),
23-
('int16', 'int16_t', 'float32_t', 'np.float32'),
24-
('int32', 'int32_t', 'float64_t', 'np.float64'),
25-
('int64', 'int64_t', 'float64_t', 'np.float64')]
19+
# name, c_type, dest_type
20+
dtypes = [('float64', 'float64_t', 'float64_t'),
21+
('float32', 'float32_t', 'float32_t'),
22+
('int8', 'int8_t', 'float32_t'),
23+
('int16', 'int16_t', 'float32_t'),
24+
('int32', 'int32_t', 'float64_t'),
25+
('int64', 'int64_t', 'float64_t')]
2626

2727
def get_dispatch(dtypes):
2828

29-
for name, c_type, dest_type, dest_dtype, in dtypes:
30-
31-
dest_type2 = dest_type
32-
dest_type = dest_type.replace('_t', '')
33-
34-
yield name, c_type, dest_type, dest_type2, dest_dtype
29+
for name, c_type, dest_type, in dtypes:
30+
dest_name = dest_type[:-2] # i.e. strip "_t"
31+
yield name, c_type, dest_type, dest_name
3532

3633
}}
3734

38-
{{for name, c_type, dest_type, dest_type2, dest_dtype
35+
{{for name, c_type, dest_type, dest_name
3936
in get_dispatch(dtypes)}}
4037

4138

4239
@cython.boundscheck(False)
4340
@cython.wraparound(False)
4441
def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
45-
ndarray[{{dest_type2}}, ndim=2] out,
42+
ndarray[{{dest_type}}, ndim=2] out,
4643
Py_ssize_t periods, int axis):
4744
cdef:
4845
Py_ssize_t i, j, sx, sy
@@ -84,9 +81,9 @@ def diff_2d_{{name}}(ndarray[{{c_type}}, ndim=2] arr,
8481
out[i, j] = arr[i, j] - arr[i, j - periods]
8582

8683

87-
def put2d_{{name}}_{{dest_type}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
84+
def put2d_{{name}}_{{dest_name}}(ndarray[{{c_type}}, ndim=2, cast=True] values,
8885
ndarray[int64_t] indexer, Py_ssize_t loc,
89-
ndarray[{{dest_type2}}] out):
86+
ndarray[{{dest_type}}] out):
9087
cdef:
9188
Py_ssize_t i, j, k
9289

pandas/_libs/algos_rank_helper.pxi.in

+21-36
Original file line numberDiff line numberDiff line change
@@ -131,45 +131,20 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
131131
argsorted = _as.astype('i8')
132132

133133
{{if dtype == 'object'}}
134-
for i in range(n):
135-
sum_ranks += i + 1
136-
dups += 1
137-
isnan = sorted_mask[i]
138-
val = util.get_value_at(sorted_data, i)
139-
140-
if isnan and keep_na:
141-
ranks[argsorted[i]] = nan
142-
continue
143-
count += 1.0
144-
145-
if (i == n - 1 or
146-
are_diff(util.get_value_at(sorted_data, i + 1), val) or
147-
i == non_na_idx):
148-
if tiebreak == TIEBREAK_AVERAGE:
149-
for j in range(i - dups + 1, i + 1):
150-
ranks[argsorted[j]] = sum_ranks / dups
151-
elif tiebreak == TIEBREAK_MIN:
152-
for j in range(i - dups + 1, i + 1):
153-
ranks[argsorted[j]] = i - dups + 2
154-
elif tiebreak == TIEBREAK_MAX:
155-
for j in range(i - dups + 1, i + 1):
156-
ranks[argsorted[j]] = i + 1
157-
elif tiebreak == TIEBREAK_FIRST:
158-
raise ValueError('first not supported for non-numeric data')
159-
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
160-
for j in range(i - dups + 1, i + 1):
161-
ranks[argsorted[j]] = 2 * i - j - dups + 2
162-
elif tiebreak == TIEBREAK_DENSE:
163-
total_tie_count += 1
164-
for j in range(i - dups + 1, i + 1):
165-
ranks[argsorted[j]] = total_tie_count
166-
sum_ranks = dups = 0
134+
if True:
167135
{{else}}
168136
with nogil:
137+
{{endif}}
138+
# TODO: why does the 2d version not have a nogil block?
169139
for i in range(n):
170140
sum_ranks += i + 1
171141
dups += 1
142+
143+
{{if dtype == 'object'}}
144+
val = util.get_value_at(sorted_data, i)
145+
{{else}}
172146
val = sorted_data[i]
147+
{{endif}}
173148

174149
{{if dtype != 'uint64'}}
175150
isnan = sorted_mask[i]
@@ -180,8 +155,14 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
180155

181156
count += 1.0
182157

183-
if (i == n - 1 or sorted_data[i + 1] != val or
184-
i == non_na_idx):
158+
{{if dtype == 'object'}}
159+
if (i == n - 1 or
160+
are_diff(util.get_value_at(sorted_data, i + 1), val) or
161+
i == non_na_idx):
162+
{{else}}
163+
if (i == n - 1 or sorted_data[i + 1] != val or i == non_na_idx):
164+
{{endif}}
165+
185166
if tiebreak == TIEBREAK_AVERAGE:
186167
for j in range(i - dups + 1, i + 1):
187168
ranks[argsorted[j]] = sum_ranks / dups
@@ -192,8 +173,13 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
192173
for j in range(i - dups + 1, i + 1):
193174
ranks[argsorted[j]] = i + 1
194175
elif tiebreak == TIEBREAK_FIRST:
176+
{{if dtype == 'object'}}
177+
raise ValueError('first not supported for '
178+
'non-numeric data')
179+
{{else}}
195180
for j in range(i - dups + 1, i + 1):
196181
ranks[argsorted[j]] = j + 1
182+
{{endif}}
197183
elif tiebreak == TIEBREAK_FIRST_DESCENDING:
198184
for j in range(i - dups + 1, i + 1):
199185
ranks[argsorted[j]] = 2 * i - j - dups + 2
@@ -202,7 +188,6 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average',
202188
for j in range(i - dups + 1, i + 1):
203189
ranks[argsorted[j]] = total_tie_count
204190
sum_ranks = dups = 0
205-
{{endif}}
206191
if pct:
207192
if tiebreak == TIEBREAK_DENSE:
208193
return ranks / total_tie_count

0 commit comments

Comments
 (0)