@@ -382,6 +382,9 @@ def group_any_all(uint8_t[:] out,
382
382
if values[i] == flag_val:
383
383
out[lab] = flag_val
384
384
385
+ # ----------------------------------------------------------------------
386
+ # group_add, group_prod, group_var, group_mean, group_ohlc
387
+ # ----------------------------------------------------------------------
385
388
386
389
@ cython.wraparound (False )
387
390
@ cython.boundscheck (False )
@@ -433,5 +436,212 @@ def _group_add(floating[:, :] out,
433
436
group_add_float32 = _group_add[' float' ]
434
437
group_add_float64 = _group_add[' double' ]
435
438
439
+
440
+ @ cython.wraparound (False )
441
+ @ cython.boundscheck (False )
442
+ def _group_prod (floating[:, :] out ,
443
+ int64_t[:] counts ,
444
+ floating[:, :] values ,
445
+ const int64_t[:] labels ,
446
+ Py_ssize_t min_count = 0 ):
447
+ """
448
+ Only aggregates on axis=0
449
+ """
450
+ cdef:
451
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
452
+ floating val, count
453
+ ndarray[floating, ndim= 2 ] prodx, nobs
454
+
455
+ if not len (values) == len (labels):
456
+ raise AssertionError (" len(index) != len(labels)" )
457
+
458
+ nobs = np.zeros_like(out)
459
+ prodx = np.ones_like(out)
460
+
461
+ N, K = (< object > values).shape
462
+
463
+ with nogil:
464
+ for i in range (N):
465
+ lab = labels[i]
466
+ if lab < 0 :
467
+ continue
468
+
469
+ counts[lab] += 1
470
+ for j in range (K):
471
+ val = values[i, j]
472
+
473
+ # not nan
474
+ if val == val:
475
+ nobs[lab, j] += 1
476
+ prodx[lab, j] *= val
477
+
478
+ for i in range (ncounts):
479
+ for j in range (K):
480
+ if nobs[i, j] < min_count:
481
+ out[i, j] = NAN
482
+ else :
483
+ out[i, j] = prodx[i, j]
484
+
485
+ group_prod_float32 = _group_prod[' float' ]
486
+ group_prod_float64 = _group_prod[' double' ]
487
+
488
+
489
+ @ cython.wraparound (False )
490
+ @ cython.boundscheck (False )
491
+ @ cython.cdivision (True )
492
+ def _group_var (floating[:, :] out ,
493
+ int64_t[:] counts ,
494
+ floating[:, :] values ,
495
+ const int64_t[:] labels ,
496
+ Py_ssize_t min_count = - 1 ):
497
+ cdef:
498
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
499
+ floating val, ct, oldmean
500
+ ndarray[floating, ndim= 2 ] nobs, mean
501
+
502
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
503
+
504
+ if not len (values) == len (labels):
505
+ raise AssertionError (" len(index) != len(labels)" )
506
+
507
+ nobs = np.zeros_like(out)
508
+ mean = np.zeros_like(out)
509
+
510
+ N, K = (< object > values).shape
511
+
512
+ out[:, :] = 0.0
513
+
514
+ with nogil:
515
+ for i in range (N):
516
+ lab = labels[i]
517
+ if lab < 0 :
518
+ continue
519
+
520
+ counts[lab] += 1
521
+
522
+ for j in range (K):
523
+ val = values[i, j]
524
+
525
+ # not nan
526
+ if val == val:
527
+ nobs[lab, j] += 1
528
+ oldmean = mean[lab, j]
529
+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
530
+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
531
+
532
+ for i in range (ncounts):
533
+ for j in range (K):
534
+ ct = nobs[i, j]
535
+ if ct < 2 :
536
+ out[i, j] = NAN
537
+ else :
538
+ out[i, j] /= (ct - 1 )
539
+
540
+
541
+ group_var_float32 = _group_var[' float' ]
542
+ group_var_float64 = _group_var[' double' ]
543
+
544
+
545
+ @ cython.wraparound (False )
546
+ @ cython.boundscheck (False )
547
+ def _group_mean (floating[:, :] out ,
548
+ int64_t[:] counts ,
549
+ floating[:, :] values ,
550
+ const int64_t[:] labels ,
551
+ Py_ssize_t min_count = - 1 ):
552
+ cdef:
553
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
554
+ floating val, count
555
+ ndarray[floating, ndim= 2 ] sumx, nobs
556
+
557
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
558
+
559
+ if not len (values) == len (labels):
560
+ raise AssertionError (" len(index) != len(labels)" )
561
+
562
+ nobs = np.zeros_like(out)
563
+ sumx = np.zeros_like(out)
564
+
565
+ N, K = (< object > values).shape
566
+
567
+ with nogil:
568
+ for i in range (N):
569
+ lab = labels[i]
570
+ if lab < 0 :
571
+ continue
572
+
573
+ counts[lab] += 1
574
+ for j in range (K):
575
+ val = values[i, j]
576
+ # not nan
577
+ if val == val:
578
+ nobs[lab, j] += 1
579
+ sumx[lab, j] += val
580
+
581
+ for i in range (ncounts):
582
+ for j in range (K):
583
+ count = nobs[i, j]
584
+ if nobs[i, j] == 0 :
585
+ out[i, j] = NAN
586
+ else :
587
+ out[i, j] = sumx[i, j] / count
588
+
589
+
590
+ group_mean_float32 = _group_mean[' float' ]
591
+ group_mean_float64 = _group_mean[' double' ]
592
+
593
+
594
+ @ cython.wraparound (False )
595
+ @ cython.boundscheck (False )
596
+ def _group_ohlc (floating[:, :] out ,
597
+ int64_t[:] counts ,
598
+ floating[:, :] values ,
599
+ const int64_t[:] labels ,
600
+ Py_ssize_t min_count = - 1 ):
601
+ """
602
+ Only aggregates on axis=0
603
+ """
604
+ cdef:
605
+ Py_ssize_t i, j, N, K, lab
606
+ floating val, count
607
+ Py_ssize_t ngroups = len (counts)
608
+
609
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
610
+
611
+ if len (labels) == 0 :
612
+ return
613
+
614
+ N, K = (< object > values).shape
615
+
616
+ if out.shape[1 ] != 4 :
617
+ raise ValueError (' Output array must have 4 columns' )
618
+
619
+ if K > 1 :
620
+ raise NotImplementedError (" Argument 'values' must have only "
621
+ " one dimension" )
622
+ out[:] = np.nan
623
+
624
+ with nogil:
625
+ for i in range (N):
626
+ lab = labels[i]
627
+ if lab == - 1 :
628
+ continue
629
+
630
+ counts[lab] += 1
631
+ val = values[i, 0 ]
632
+ if val != val:
633
+ continue
634
+
635
+ if out[lab, 0 ] != out[lab, 0 ]:
636
+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
637
+ else :
638
+ out[lab, 1 ] = max (out[lab, 1 ], val)
639
+ out[lab, 2 ] = min (out[lab, 2 ], val)
640
+ out[lab, 3 ] = val
641
+
642
+
643
+ group_ohlc_float32 = _group_ohlc[' float' ]
644
+ group_ohlc_float64 = _group_ohlc[' double' ]
645
+
436
646
# generated from template
437
647
include " groupby_helper.pxi"
0 commit comments