@@ -382,6 +382,10 @@ def group_any_all(uint8_t[:] out,
382
382
if values[i] == flag_val:
383
383
out[lab] = flag_val
384
384
385
+ # ----------------------------------------------------------------------
386
+ # group_add, group_prod, group_var, group_mean, group_ohlc
387
+ # ----------------------------------------------------------------------
388
+
385
389
386
390
@ cython.wraparound (False )
387
391
@ cython.boundscheck (False )
@@ -433,5 +437,213 @@ def _group_add(floating[:, :] out,
433
437
group_add_float32 = _group_add[' float' ]
434
438
group_add_float64 = _group_add[' double' ]
435
439
440
+
441
+ @ cython.wraparound (False )
442
+ @ cython.boundscheck (False )
443
+ def _group_prod (floating[:, :] out ,
444
+ int64_t[:] counts ,
445
+ floating[:, :] values ,
446
+ const int64_t[:] labels ,
447
+ Py_ssize_t min_count = 0 ):
448
+ """
449
+ Only aggregates on axis=0
450
+ """
451
+ cdef:
452
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
453
+ floating val, count
454
+ ndarray[floating, ndim= 2 ] prodx, nobs
455
+
456
+ if not len (values) == len (labels):
457
+ raise AssertionError (" len(index) != len(labels)" )
458
+
459
+ nobs = np.zeros_like(out)
460
+ prodx = np.ones_like(out)
461
+
462
+ N, K = (< object > values).shape
463
+
464
+ with nogil:
465
+ for i in range (N):
466
+ lab = labels[i]
467
+ if lab < 0 :
468
+ continue
469
+
470
+ counts[lab] += 1
471
+ for j in range (K):
472
+ val = values[i, j]
473
+
474
+ # not nan
475
+ if val == val:
476
+ nobs[lab, j] += 1
477
+ prodx[lab, j] *= val
478
+
479
+ for i in range (ncounts):
480
+ for j in range (K):
481
+ if nobs[i, j] < min_count:
482
+ out[i, j] = NAN
483
+ else :
484
+ out[i, j] = prodx[i, j]
485
+
486
+
487
+ group_prod_float32 = _group_prod[' float' ]
488
+ group_prod_float64 = _group_prod[' double' ]
489
+
490
+
491
+ @ cython.wraparound (False )
492
+ @ cython.boundscheck (False )
493
+ @ cython.cdivision (True )
494
+ def _group_var (floating[:, :] out ,
495
+ int64_t[:] counts ,
496
+ floating[:, :] values ,
497
+ const int64_t[:] labels ,
498
+ Py_ssize_t min_count = - 1 ):
499
+ cdef:
500
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
501
+ floating val, ct, oldmean
502
+ ndarray[floating, ndim= 2 ] nobs, mean
503
+
504
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
505
+
506
+ if not len (values) == len (labels):
507
+ raise AssertionError (" len(index) != len(labels)" )
508
+
509
+ nobs = np.zeros_like(out)
510
+ mean = np.zeros_like(out)
511
+
512
+ N, K = (< object > values).shape
513
+
514
+ out[:, :] = 0.0
515
+
516
+ with nogil:
517
+ for i in range (N):
518
+ lab = labels[i]
519
+ if lab < 0 :
520
+ continue
521
+
522
+ counts[lab] += 1
523
+
524
+ for j in range (K):
525
+ val = values[i, j]
526
+
527
+ # not nan
528
+ if val == val:
529
+ nobs[lab, j] += 1
530
+ oldmean = mean[lab, j]
531
+ mean[lab, j] += (val - oldmean) / nobs[lab, j]
532
+ out[lab, j] += (val - mean[lab, j]) * (val - oldmean)
533
+
534
+ for i in range (ncounts):
535
+ for j in range (K):
536
+ ct = nobs[i, j]
537
+ if ct < 2 :
538
+ out[i, j] = NAN
539
+ else :
540
+ out[i, j] /= (ct - 1 )
541
+
542
+
543
+ group_var_float32 = _group_var[' float' ]
544
+ group_var_float64 = _group_var[' double' ]
545
+
546
+
547
+ @ cython.wraparound (False )
548
+ @ cython.boundscheck (False )
549
+ def _group_mean (floating[:, :] out ,
550
+ int64_t[:] counts ,
551
+ floating[:, :] values ,
552
+ const int64_t[:] labels ,
553
+ Py_ssize_t min_count = - 1 ):
554
+ cdef:
555
+ Py_ssize_t i, j, N, K, lab, ncounts = len (counts)
556
+ floating val, count
557
+ ndarray[floating, ndim= 2 ] sumx, nobs
558
+
559
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
560
+
561
+ if not len (values) == len (labels):
562
+ raise AssertionError (" len(index) != len(labels)" )
563
+
564
+ nobs = np.zeros_like(out)
565
+ sumx = np.zeros_like(out)
566
+
567
+ N, K = (< object > values).shape
568
+
569
+ with nogil:
570
+ for i in range (N):
571
+ lab = labels[i]
572
+ if lab < 0 :
573
+ continue
574
+
575
+ counts[lab] += 1
576
+ for j in range (K):
577
+ val = values[i, j]
578
+ # not nan
579
+ if val == val:
580
+ nobs[lab, j] += 1
581
+ sumx[lab, j] += val
582
+
583
+ for i in range (ncounts):
584
+ for j in range (K):
585
+ count = nobs[i, j]
586
+ if nobs[i, j] == 0 :
587
+ out[i, j] = NAN
588
+ else :
589
+ out[i, j] = sumx[i, j] / count
590
+
591
+
592
+ group_mean_float32 = _group_mean[' float' ]
593
+ group_mean_float64 = _group_mean[' double' ]
594
+
595
+
596
+ @ cython.wraparound (False )
597
+ @ cython.boundscheck (False )
598
+ def _group_ohlc (floating[:, :] out ,
599
+ int64_t[:] counts ,
600
+ floating[:, :] values ,
601
+ const int64_t[:] labels ,
602
+ Py_ssize_t min_count = - 1 ):
603
+ """
604
+ Only aggregates on axis=0
605
+ """
606
+ cdef:
607
+ Py_ssize_t i, j, N, K, lab
608
+ floating val, count
609
+ Py_ssize_t ngroups = len (counts)
610
+
611
+ assert min_count == - 1 , " 'min_count' only used in add and prod"
612
+
613
+ if len (labels) == 0 :
614
+ return
615
+
616
+ N, K = (< object > values).shape
617
+
618
+ if out.shape[1 ] != 4 :
619
+ raise ValueError (' Output array must have 4 columns' )
620
+
621
+ if K > 1 :
622
+ raise NotImplementedError (" Argument 'values' must have only "
623
+ " one dimension" )
624
+ out[:] = np.nan
625
+
626
+ with nogil:
627
+ for i in range (N):
628
+ lab = labels[i]
629
+ if lab == - 1 :
630
+ continue
631
+
632
+ counts[lab] += 1
633
+ val = values[i, 0 ]
634
+ if val != val:
635
+ continue
636
+
637
+ if out[lab, 0 ] != out[lab, 0 ]:
638
+ out[lab, 0 ] = out[lab, 1 ] = out[lab, 2 ] = out[lab, 3 ] = val
639
+ else :
640
+ out[lab, 1 ] = max (out[lab, 1 ], val)
641
+ out[lab, 2 ] = min (out[lab, 2 ], val)
642
+ out[lab, 3 ] = val
643
+
644
+
645
+ group_ohlc_float32 = _group_ohlc[' float' ]
646
+ group_ohlc_float64 = _group_ohlc[' double' ]
647
+
436
648
# generated from template
437
649
include " groupby_helper.pxi"
0 commit comments