@@ -673,10 +673,45 @@ def group_mean(floating[:, ::1] out,
673
673
int64_t[::1] counts ,
674
674
ndarray[floating , ndim = 2 ] values,
675
675
const intp_t[::1] labels ,
676
- Py_ssize_t min_count = - 1 ) -> None:
676
+ Py_ssize_t min_count = - 1 ,
677
+ bint is_datetimelike = False ,
678
+ const uint8_t[:, ::1] mask = None ,
679
+ uint8_t[:, ::1] result_mask = None
680
+ ) -> None:
681
+ """
682
+ Compute the mean per label given a label assignment for each value.
683
+ NaN values are ignored.
684
+
685
+ Parameters
686
+ ----------
687
+ out : np.ndarray[floating]
688
+ Values into which this method will write its results.
689
+ counts : np.ndarray[int64]
690
+ A zeroed array of the same shape as labels ,
691
+ populated by group sizes during algorithm.
692
+ values : np.ndarray[floating]
693
+ 2-d array of the values to find the mean of.
694
+ labels : np.ndarray[np.intp]
695
+ Array containing unique label for each group , with its
696
+ ordering matching up to the corresponding record in `values`.
697
+ min_count : Py_ssize_t
698
+ Only used in add and prod. Always -1.
699
+ is_datetimelike : bool
700
+ True if `values` contains datetime-like entries.
701
+ mask : ndarray[bool , ndim = 2 ], optional
702
+ Not used.
703
+ result_mask : ndarray[bool , ndim = 2 ], optional
704
+ Not used.
705
+
706
+ Notes
707
+ -----
708
+ This method modifies the `out` parameter rather than returning an object.
709
+ `counts` is modified to hold group sizes
710
+ """
711
+
677
712
cdef:
678
713
Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
679
- floating val , count , y , t
714
+ floating val , count , y , t , nan_val
680
715
floating[:, ::1] sumx , compensation
681
716
int64_t[:, ::1] nobs
682
717
Py_ssize_t len_values = len (values), len_labels = len (labels)
@@ -686,12 +721,13 @@ def group_mean(floating[:, ::1] out,
686
721
if len_values != len_labels:
687
722
raise ValueError("len(index ) != len(labels )")
688
723
689
- nobs = np.zeros((< object > out).shape, dtype = np.int64)
690
724
# the below is equivalent to `np.zeros_like(out )` but faster
725
+ nobs = np.zeros((< object > out).shape, dtype = np.int64)
691
726
sumx = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
692
727
compensation = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
693
728
694
729
N , K = (< object > values).shape
730
+ nan_val = NPY_NAT if is_datetimelike else NAN
695
731
696
732
with nogil:
697
733
for i in range(N ):
@@ -703,7 +739,7 @@ def group_mean(floating[:, ::1] out,
703
739
for j in range (K):
704
740
val = values[i, j]
705
741
# not nan
706
- if val == val:
742
+ if val == val and not (is_datetimelike and val == NPY_NAT) :
707
743
nobs[lab, j] += 1
708
744
y = val - compensation[lab, j]
709
745
t = sumx[lab, j] + y
@@ -714,7 +750,7 @@ def group_mean(floating[:, ::1] out,
714
750
for j in range (K):
715
751
count = nobs[i, j]
716
752
if nobs[i, j] == 0 :
717
- out[i, j] = NAN
753
+ out[i, j] = nan_val
718
754
else :
719
755
out[i, j] = sumx[i, j] / count
720
756
0 commit comments