@@ -675,10 +675,45 @@ def group_mean(floating[:, ::1] out,
675
675
int64_t[::1] counts ,
676
676
ndarray[floating , ndim = 2 ] values,
677
677
const intp_t[::1] labels ,
678
- Py_ssize_t min_count = - 1 ) -> None:
678
+ Py_ssize_t min_count = - 1 ,
679
+ bint is_datetimelike = False ,
680
+ const uint8_t[:, ::1] mask = None ,
681
+ uint8_t[:, ::1] result_mask = None
682
+ ) -> None:
683
+ """
684
+ Compute the mean per label given a label assignment for each value.
685
+ NaN values are ignored.
686
+
687
+ Parameters
688
+ ----------
689
+ out : np.ndarray[floating]
690
+ Values into which this method will write its results.
691
+ counts : np.ndarray[int64]
692
+ A zeroed array of the same shape as labels ,
693
+ populated by group sizes during algorithm.
694
+ values : np.ndarray[floating]
695
+ 2-d array of the values to find the mean of.
696
+ labels : np.ndarray[np.intp]
697
+ Array containing unique label for each group , with its
698
+ ordering matching up to the corresponding record in `values`.
699
+ min_count : Py_ssize_t
700
+ Only used in add and prod. Always -1.
701
+ is_datetimelike : bool
702
+ True if `values` contains datetime-like entries.
703
+ mask : ndarray[bool , ndim = 2 ], optional
704
+ Not used.
705
+ result_mask : ndarray[bool , ndim = 2 ], optional
706
+ Not used.
707
+
708
+ Notes
709
+ -----
710
+ This method modifies the `out` parameter rather than returning an object.
711
+ `counts` is modified to hold group sizes
712
+ """
713
+
679
714
cdef:
680
715
Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
681
- floating val , count , y , t
716
+ floating val , count , y , t , nan_val
682
717
floating[:, ::1] sumx , compensation
683
718
int64_t[:, ::1] nobs
684
719
Py_ssize_t len_values = len (values), len_labels = len (labels)
@@ -688,12 +723,13 @@ def group_mean(floating[:, ::1] out,
688
723
if len_values != len_labels:
689
724
raise ValueError("len(index ) != len(labels )")
690
725
691
- nobs = np.zeros((< object > out).shape, dtype = np.int64)
692
726
# the below is equivalent to `np.zeros_like(out )` but faster
727
+ nobs = np.zeros((< object > out).shape, dtype = np.int64)
693
728
sumx = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
694
729
compensation = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
695
730
696
731
N , K = (< object > values).shape
732
+ nan_val = NPY_NAT if is_datetimelike else NAN
697
733
698
734
with nogil:
699
735
for i in range(N ):
@@ -705,7 +741,7 @@ def group_mean(floating[:, ::1] out,
705
741
for j in range (K):
706
742
val = values[i, j]
707
743
# not nan
708
- if val == val:
744
+ if val == val and not (is_datetimelike and val == NPY_NAT) :
709
745
nobs[lab, j] += 1
710
746
y = val - compensation[lab, j]
711
747
t = sumx[lab, j] + y
@@ -716,7 +752,7 @@ def group_mean(floating[:, ::1] out,
716
752
for j in range (K):
717
753
count = nobs[i, j]
718
754
if nobs[i, j] == 0 :
719
- out[i, j] = NAN
755
+ out[i, j] = nan_val
720
756
else :
721
757
out[i, j] = sumx[i, j] / count
722
758
0 commit comments