@@ -94,5 +94,221 @@ cdef inline float64_t kth_smallest_c(float64_t* a,
94
94
return a[k]
95
95
96
96
97
+ @ cython.boundscheck (False )
98
+ @ cython.wraparound (False )
99
+ def group_median_float64 (ndarray[float64_t , ndim = 2 ] out,
100
+ ndarray[int64_t] counts ,
101
+ ndarray[float64_t , ndim = 2 ] values,
102
+ ndarray[int64_t] labels ,
103
+ Py_ssize_t min_count = - 1 ):
104
+ """
105
+ Only aggregates on axis=0
106
+ """
107
+ cdef:
108
+ Py_ssize_t i, j, N, K, ngroups, size
109
+ ndarray[int64_t] _counts
110
+ ndarray data
111
+ float64_t* ptr
112
+
113
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
114
+
115
+ ngroups = len (counts)
116
+ N, K = (< object > values).shape
117
+
118
+ indexer, _counts = groupsort_indexer(labels, ngroups)
119
+ counts[:] = _counts[1 :]
120
+
121
+ data = np.empty((K, N), dtype = np.float64)
122
+ ptr = < float64_t* > data.data
123
+
124
+ take_2d_axis1_float64_float64(values.T, indexer, out = data)
125
+
126
+ with nogil:
127
+
128
+ for i in range (K):
129
+ # exclude NA group
130
+ ptr += _counts[0 ]
131
+ for j in range (ngroups):
132
+ size = _counts[j + 1 ]
133
+ out[j, i] = median_linear(ptr, size)
134
+ ptr += size
135
+
136
+
137
+ @ cython.boundscheck (False )
138
+ @ cython.wraparound (False )
139
+ def group_cumprod_float64 (float64_t[:, :] out ,
140
+ float64_t[:, :] values ,
141
+ int64_t[:] labels ,
142
+ bint is_datetimelike ):
143
+ """
144
+ Only transforms on axis=0
145
+ """
146
+ cdef:
147
+ Py_ssize_t i, j, N, K, size
148
+ float64_t val
149
+ float64_t[:, :] accum
150
+ int64_t lab
151
+
152
+ N, K = (< object > values).shape
153
+ accum = np.ones_like(values)
154
+
155
+ with nogil:
156
+ for i in range (N):
157
+ lab = labels[i]
158
+
159
+ if lab < 0 :
160
+ continue
161
+ for j in range (K):
162
+ val = values[i, j]
163
+ if val == val:
164
+ accum[lab, j] *= val
165
+ out[i, j] = accum[lab, j]
166
+
167
+
168
+ @ cython.boundscheck (False )
169
+ @ cython.wraparound (False )
170
+ def group_cumsum (numeric[:, :] out ,
171
+ numeric[:, :] values ,
172
+ int64_t[:] labels ,
173
+ is_datetimelike ):
174
+ """
175
+ Only transforms on axis=0
176
+ """
177
+ cdef:
178
+ Py_ssize_t i, j, N, K, size
179
+ numeric val
180
+ numeric[:, :] accum
181
+ int64_t lab
182
+
183
+ N, K = (< object > values).shape
184
+ accum = np.zeros_like(values)
185
+
186
+ with nogil:
187
+ for i in range (N):
188
+ lab = labels[i]
189
+
190
+ if lab < 0 :
191
+ continue
192
+ for j in range (K):
193
+ val = values[i, j]
194
+
195
+ if numeric == float32_t or numeric == float64_t:
196
+ if val == val:
197
+ accum[lab, j] += val
198
+ out[i, j] = accum[lab, j]
199
+ else :
200
+ accum[lab, j] += val
201
+ out[i, j] = accum[lab, j]
202
+
203
+
204
+ @ cython.boundscheck (False )
205
+ @ cython.wraparound (False )
206
+ def group_shift_indexer (ndarray[int64_t] out , ndarray[int64_t] labels ,
207
+ int ngroups , int periods ):
208
+ cdef:
209
+ Py_ssize_t N, i, j, ii
210
+ int offset, sign
211
+ int64_t lab, idxer, idxer_slot
212
+ int64_t[:] label_seen = np.zeros(ngroups, dtype = np.int64)
213
+ int64_t[:, :] label_indexer
214
+
215
+ N, = (< object > labels).shape
216
+
217
+ if periods < 0 :
218
+ periods = - periods
219
+ offset = N - 1
220
+ sign = - 1
221
+ elif periods > 0 :
222
+ offset = 0
223
+ sign = 1
224
+
225
+ if periods == 0 :
226
+ with nogil:
227
+ for i in range (N):
228
+ out[i] = i
229
+ else :
230
+ # array of each previous indexer seen
231
+ label_indexer = np.zeros((ngroups, periods), dtype = np.int64)
232
+ with nogil:
233
+ for i in range (N):
234
+ # # reverse iterator if shifting backwards
235
+ ii = offset + sign * i
236
+ lab = labels[ii]
237
+
238
+ # Skip null keys
239
+ if lab == - 1 :
240
+ out[ii] = - 1
241
+ continue
242
+
243
+ label_seen[lab] += 1
244
+
245
+ idxer_slot = label_seen[lab] % periods
246
+ idxer = label_indexer[lab, idxer_slot]
247
+
248
+ if label_seen[lab] > periods:
249
+ out[ii] = idxer
250
+ else :
251
+ out[ii] = - 1
252
+
253
+ label_indexer[lab, idxer_slot] = ii
254
+
255
+
256
+ @ cython.wraparound (False )
257
+ @ cython.boundscheck (False )
258
+ def group_fillna_indexer (ndarray[int64_t] out , ndarray[int64_t] labels ,
259
+ ndarray[uint8_t] mask , object direction ,
260
+ int64_t limit ):
261
+ """ Indexes how to fill values forwards or backwards within a group
262
+
263
+ Parameters
264
+ ----------
265
+ out : array of int64_t values which this method will write its results to
266
+ Missing values will be written to with a value of -1
267
+ labels : array containing unique label for each group, with its ordering
268
+ matching up to the corresponding record in `values`
269
+ mask : array of int64_t values where a 1 indicates a missing value
270
+ direction : {'ffill', 'bfill'}
271
+ Direction for fill to be applied (forwards or backwards, respectively)
272
+ limit : Consecutive values to fill before stopping, or -1 for no limit
273
+
274
+ Notes
275
+ -----
276
+ This method modifies the `out` parameter rather than returning an object
277
+ """
278
+ cdef:
279
+ Py_ssize_t i, N
280
+ ndarray[int64_t] sorted_labels
281
+ int64_t idx, curr_fill_idx= - 1 , filled_vals= 0
282
+
283
+ N = len (out)
284
+
285
+ # Make sure all arrays are the same size
286
+ assert N == len (labels) == len (mask)
287
+
288
+ sorted_labels = np.argsort(labels).astype(np.int64, copy = False )
289
+ if direction == ' bfill' :
290
+ sorted_labels = sorted_labels[::- 1 ]
291
+
292
+ with nogil:
293
+ for i in range (N):
294
+ idx = sorted_labels[i]
295
+ if mask[idx] == 1 : # is missing
296
+ # Stop filling once we've hit the limit
297
+ if filled_vals >= limit and limit != - 1 :
298
+ curr_fill_idx = - 1
299
+ filled_vals += 1
300
+ else : # reset items when not missing
301
+ filled_vals = 0
302
+ curr_fill_idx = idx
303
+
304
+ out[idx] = curr_fill_idx
305
+
306
+ # If we move to the next group, reset
307
+ # the fill_idx and counter
308
+ if i == N - 1 or labels[idx] != labels[sorted_labels[i+ 1 ]]:
309
+ curr_fill_idx = - 1
310
+ filled_vals = 0
311
+
312
+
97
313
# generated from template
98
314
include " groupby_helper.pxi"
0 commit comments