@@ -36,7 +36,8 @@ def get_dispatch(dtypes):
36
36
def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
37
37
ndarray[int64_t] counts,
38
38
ndarray[{{c_type}}, ndim=2] values,
39
- ndarray[int64_t] labels):
39
+ ndarray[int64_t] labels,
40
+ Py_ssize_t min_count=1):
40
41
"""
41
42
Only aggregates on axis=0
42
43
"""
@@ -88,7 +89,7 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
88
89
89
90
for i in range(ncounts):
90
91
for j in range(K):
91
- if nobs[i, j] == 0 :
92
+ if nobs[i, j] < min_count :
92
93
out[i, j] = NAN
93
94
else:
94
95
out[i, j] = sumx[i, j]
@@ -99,7 +100,8 @@ def group_add_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
99
100
def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
100
101
ndarray[int64_t] counts,
101
102
ndarray[{{c_type}}, ndim=2] values,
102
- ndarray[int64_t] labels):
103
+ ndarray[int64_t] labels,
104
+ Py_ssize_t min_count=1):
103
105
"""
104
106
Only aggregates on axis=0
105
107
"""
@@ -147,7 +149,7 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
147
149
148
150
for i in range(ncounts):
149
151
for j in range(K):
150
- if nobs[i, j] == 0 :
152
+ if nobs[i, j] < min_count :
151
153
out[i, j] = NAN
152
154
else:
153
155
out[i, j] = prodx[i, j]
@@ -159,12 +161,15 @@ def group_prod_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
159
161
def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
160
162
ndarray[int64_t] counts,
161
163
ndarray[{{dest_type2}}, ndim=2] values,
162
- ndarray[int64_t] labels):
164
+ ndarray[int64_t] labels,
165
+ Py_ssize_t min_count=-1):
163
166
cdef:
164
167
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
165
168
{{dest_type2}} val, ct, oldmean
166
169
ndarray[{{dest_type2}}, ndim=2] nobs, mean
167
170
171
+ assert min_count == -1, "'min_count' only used in add and prod"
172
+
168
173
if not len(values) == len(labels):
169
174
raise AssertionError("len(index) != len(labels)")
170
175
@@ -208,12 +213,15 @@ def group_var_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
208
213
def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
209
214
ndarray[int64_t] counts,
210
215
ndarray[{{dest_type2}}, ndim=2] values,
211
- ndarray[int64_t] labels):
216
+ ndarray[int64_t] labels,
217
+ Py_ssize_t min_count=-1):
212
218
cdef:
213
219
Py_ssize_t i, j, N, K, lab, ncounts = len(counts)
214
220
{{dest_type2}} val, count
215
221
ndarray[{{dest_type2}}, ndim=2] sumx, nobs
216
222
223
+ assert min_count == -1, "'min_count' only used in add and prod"
224
+
217
225
if not len(values) == len(labels):
218
226
raise AssertionError("len(index) != len(labels)")
219
227
@@ -263,7 +271,8 @@ def group_mean_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
263
271
def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
264
272
ndarray[int64_t] counts,
265
273
ndarray[{{dest_type2}}, ndim=2] values,
266
- ndarray[int64_t] labels):
274
+ ndarray[int64_t] labels,
275
+ Py_ssize_t min_count=-1):
267
276
"""
268
277
Only aggregates on axis=0
269
278
"""
@@ -272,6 +281,8 @@ def group_ohlc_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
272
281
{{dest_type2}} val, count
273
282
Py_ssize_t ngroups = len(counts)
274
283
284
+ assert min_count == -1, "'min_count' only used in add and prod"
285
+
275
286
if len(labels) == 0:
276
287
return
277
288
@@ -332,7 +343,8 @@ def get_dispatch(dtypes):
332
343
def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
333
344
ndarray[int64_t] counts,
334
345
ndarray[{{c_type}}, ndim=2] values,
335
- ndarray[int64_t] labels):
346
+ ndarray[int64_t] labels,
347
+ Py_ssize_t min_count=-1):
336
348
"""
337
349
Only aggregates on axis=0
338
350
"""
@@ -342,6 +354,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
342
354
ndarray[{{dest_type2}}, ndim=2] resx
343
355
ndarray[int64_t, ndim=2] nobs
344
356
357
+ assert min_count == -1, "'min_count' only used in add and prod"
358
+
345
359
if not len(values) == len(labels):
346
360
raise AssertionError("len(index) != len(labels)")
347
361
@@ -382,7 +396,8 @@ def group_last_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
382
396
def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
383
397
ndarray[int64_t] counts,
384
398
ndarray[{{c_type}}, ndim=2] values,
385
- ndarray[int64_t] labels, int64_t rank):
399
+ ndarray[int64_t] labels, int64_t rank,
400
+ Py_ssize_t min_count=-1):
386
401
"""
387
402
Only aggregates on axis=0
388
403
"""
@@ -392,6 +407,8 @@ def group_nth_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
392
407
ndarray[{{dest_type2}}, ndim=2] resx
393
408
ndarray[int64_t, ndim=2] nobs
394
409
410
+ assert min_count == -1, "'min_count' only used in add and prod"
411
+
395
412
if not len(values) == len(labels):
396
413
raise AssertionError("len(index) != len(labels)")
397
414
@@ -455,7 +472,8 @@ def get_dispatch(dtypes):
455
472
def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
456
473
ndarray[int64_t] counts,
457
474
ndarray[{{dest_type2}}, ndim=2] values,
458
- ndarray[int64_t] labels):
475
+ ndarray[int64_t] labels,
476
+ Py_ssize_t min_count=-1):
459
477
"""
460
478
Only aggregates on axis=0
461
479
"""
@@ -464,6 +482,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
464
482
{{dest_type2}} val, count
465
483
ndarray[{{dest_type2}}, ndim=2] maxx, nobs
466
484
485
+ assert min_count == -1, "'min_count' only used in add and prod"
486
+
467
487
if not len(values) == len(labels):
468
488
raise AssertionError("len(index) != len(labels)")
469
489
@@ -526,7 +546,8 @@ def group_max_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
526
546
def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
527
547
ndarray[int64_t] counts,
528
548
ndarray[{{dest_type2}}, ndim=2] values,
529
- ndarray[int64_t] labels):
549
+ ndarray[int64_t] labels,
550
+ Py_ssize_t min_count=-1):
530
551
"""
531
552
Only aggregates on axis=0
532
553
"""
@@ -535,6 +556,8 @@ def group_min_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
535
556
{{dest_type2}} val, count
536
557
ndarray[{{dest_type2}}, ndim=2] minx, nobs
537
558
559
+ assert min_count == -1, "'min_count' only used in add and prod"
560
+
538
561
if not len(values) == len(labels):
539
562
raise AssertionError("len(index) != len(labels)")
540
563
@@ -686,7 +709,8 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
686
709
def group_median_float64(ndarray[float64_t, ndim=2] out,
687
710
ndarray[int64_t] counts,
688
711
ndarray[float64_t, ndim=2] values,
689
- ndarray[int64_t] labels):
712
+ ndarray[int64_t] labels,
713
+ Py_ssize_t min_count=-1):
690
714
"""
691
715
Only aggregates on axis=0
692
716
"""
@@ -695,6 +719,9 @@ def group_median_float64(ndarray[float64_t, ndim=2] out,
695
719
ndarray[int64_t] _counts
696
720
ndarray data
697
721
float64_t* ptr
722
+
723
+ assert min_count == -1, "'min_count' only used in add and prod"
724
+
698
725
ngroups = len(counts)
699
726
N, K = (<object> values).shape
700
727
0 commit comments