Skip to content

Commit 5e6d811

Browse files
committed
Moved non-templated funcs to groupby.pyx
1 parent 0154d09 commit 5e6d811

File tree

2 files changed

+216
-219
lines changed

2 files changed

+216
-219
lines changed

pandas/_libs/groupby.pyx

+216
Original file line numberDiff line numberDiff line change
@@ -94,5 +94,221 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
9494
return a[k]
9595

9696

97+
@cython.boundscheck(False)
98+
@cython.wraparound(False)
99+
def group_median_float64(ndarray[float64_t, ndim=2] out,
100+
ndarray[int64_t] counts,
101+
ndarray[float64_t, ndim=2] values,
102+
ndarray[int64_t] labels,
103+
Py_ssize_t min_count=-1):
104+
"""
105+
Only aggregates on axis=0
106+
"""
107+
cdef:
108+
Py_ssize_t i, j, N, K, ngroups, size
109+
ndarray[int64_t] _counts
110+
ndarray data
111+
float64_t* ptr
112+
113+
assert min_count == -1, "'min_count' only used in add and prod"
114+
115+
ngroups = len(counts)
116+
N, K = (<object> values).shape
117+
118+
indexer, _counts = groupsort_indexer(labels, ngroups)
119+
counts[:] = _counts[1:]
120+
121+
data = np.empty((K, N), dtype=np.float64)
122+
ptr = <float64_t*> data.data
123+
124+
take_2d_axis1_float64_float64(values.T, indexer, out=data)
125+
126+
with nogil:
127+
128+
for i in range(K):
129+
# exclude NA group
130+
ptr += _counts[0]
131+
for j in range(ngroups):
132+
size = _counts[j + 1]
133+
out[j, i] = median_linear(ptr, size)
134+
ptr += size
135+
136+
137+
@cython.boundscheck(False)
138+
@cython.wraparound(False)
139+
def group_cumprod_float64(float64_t[:, :] out,
140+
float64_t[:, :] values,
141+
int64_t[:] labels,
142+
bint is_datetimelike):
143+
"""
144+
Only transforms on axis=0
145+
"""
146+
cdef:
147+
Py_ssize_t i, j, N, K, size
148+
float64_t val
149+
float64_t[:, :] accum
150+
int64_t lab
151+
152+
N, K = (<object> values).shape
153+
accum = np.ones_like(values)
154+
155+
with nogil:
156+
for i in range(N):
157+
lab = labels[i]
158+
159+
if lab < 0:
160+
continue
161+
for j in range(K):
162+
val = values[i, j]
163+
if val == val:
164+
accum[lab, j] *= val
165+
out[i, j] = accum[lab, j]
166+
167+
168+
@cython.boundscheck(False)
169+
@cython.wraparound(False)
170+
def group_cumsum(numeric[:, :] out,
171+
numeric[:, :] values,
172+
int64_t[:] labels,
173+
is_datetimelike):
174+
"""
175+
Only transforms on axis=0
176+
"""
177+
cdef:
178+
Py_ssize_t i, j, N, K, size
179+
numeric val
180+
numeric[:, :] accum
181+
int64_t lab
182+
183+
N, K = (<object> values).shape
184+
accum = np.zeros_like(values)
185+
186+
with nogil:
187+
for i in range(N):
188+
lab = labels[i]
189+
190+
if lab < 0:
191+
continue
192+
for j in range(K):
193+
val = values[i, j]
194+
195+
if numeric == float32_t or numeric == float64_t:
196+
if val == val:
197+
accum[lab, j] += val
198+
out[i, j] = accum[lab, j]
199+
else:
200+
accum[lab, j] += val
201+
out[i, j] = accum[lab, j]
202+
203+
204+
@cython.boundscheck(False)
205+
@cython.wraparound(False)
206+
def group_shift_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
207+
int ngroups, int periods):
208+
cdef:
209+
Py_ssize_t N, i, j, ii
210+
int offset, sign
211+
int64_t lab, idxer, idxer_slot
212+
int64_t[:] label_seen = np.zeros(ngroups, dtype=np.int64)
213+
int64_t[:, :] label_indexer
214+
215+
N, = (<object> labels).shape
216+
217+
if periods < 0:
218+
periods = -periods
219+
offset = N - 1
220+
sign = -1
221+
elif periods > 0:
222+
offset = 0
223+
sign = 1
224+
225+
if periods == 0:
226+
with nogil:
227+
for i in range(N):
228+
out[i] = i
229+
else:
230+
# array of each previous indexer seen
231+
label_indexer = np.zeros((ngroups, periods), dtype=np.int64)
232+
with nogil:
233+
for i in range(N):
234+
## reverse iterator if shifting backwards
235+
ii = offset + sign * i
236+
lab = labels[ii]
237+
238+
# Skip null keys
239+
if lab == -1:
240+
out[ii] = -1
241+
continue
242+
243+
label_seen[lab] += 1
244+
245+
idxer_slot = label_seen[lab] % periods
246+
idxer = label_indexer[lab, idxer_slot]
247+
248+
if label_seen[lab] > periods:
249+
out[ii] = idxer
250+
else:
251+
out[ii] = -1
252+
253+
label_indexer[lab, idxer_slot] = ii
254+
255+
256+
@cython.wraparound(False)
257+
@cython.boundscheck(False)
258+
def group_fillna_indexer(ndarray[int64_t] out, ndarray[int64_t] labels,
259+
ndarray[uint8_t] mask, object direction,
260+
int64_t limit):
261+
"""Indexes how to fill values forwards or backwards within a group
262+
263+
Parameters
264+
----------
265+
out : array of int64_t values which this method will write its results to
266+
Missing values will be written to with a value of -1
267+
labels : array containing unique label for each group, with its ordering
268+
matching up to the corresponding record in `values`
269+
mask : array of int64_t values where a 1 indicates a missing value
270+
direction : {'ffill', 'bfill'}
271+
Direction for fill to be applied (forwards or backwards, respectively)
272+
limit : Consecutive values to fill before stopping, or -1 for no limit
273+
274+
Notes
275+
-----
276+
This method modifies the `out` parameter rather than returning an object
277+
"""
278+
cdef:
279+
Py_ssize_t i, N
280+
ndarray[int64_t] sorted_labels
281+
int64_t idx, curr_fill_idx=-1, filled_vals=0
282+
283+
N = len(out)
284+
285+
# Make sure all arrays are the same size
286+
assert N == len(labels) == len(mask)
287+
288+
sorted_labels = np.argsort(labels).astype(np.int64, copy=False)
289+
if direction == 'bfill':
290+
sorted_labels = sorted_labels[::-1]
291+
292+
with nogil:
293+
for i in range(N):
294+
idx = sorted_labels[i]
295+
if mask[idx] == 1: # is missing
296+
# Stop filling once we've hit the limit
297+
if filled_vals >= limit and limit != -1:
298+
curr_fill_idx = -1
299+
filled_vals += 1
300+
else: # reset items when not missing
301+
filled_vals = 0
302+
curr_fill_idx = idx
303+
304+
out[idx] = curr_fill_idx
305+
306+
# If we move to the next group, reset
307+
# the fill_idx and counter
308+
if i == N - 1 or labels[idx] != labels[sorted_labels[i+1]]:
309+
curr_fill_idx = -1
310+
filled_vals = 0
311+
312+
97313
# generated from template
98314
include "groupby_helper.pxi"

0 commit comments

Comments
 (0)