@@ -669,10 +669,45 @@ def group_mean(floating[:, ::1] out,
669
669
int64_t[::1] counts ,
670
670
ndarray[floating , ndim = 2 ] values,
671
671
const intp_t[::1] labels ,
672
- Py_ssize_t min_count = - 1 ) -> None:
672
+ Py_ssize_t min_count = - 1 ,
673
+ bint is_datetimelike = False ,
674
+ const uint8_t[:, ::1] mask = None ,
675
+ uint8_t[:, ::1] result_mask = None
676
+ ) -> None:
677
+ """
678
+ Compute the mean per label given a label assignment for each value.
679
+ NaN values are ignored.
680
+
681
+ Parameters
682
+ ----------
683
+ out : np.ndarray[floating]
684
+ Values into which this method will write its results.
685
+ counts : np.ndarray[int64]
686
+ A zeroed array of the same shape as labels ,
687
+ populated by group sizes during algorithm.
688
+ values : np.ndarray[floating]
689
+ 2-d array of the values to find the mean of.
690
+ labels : np.ndarray[np.intp]
691
+ Array containing unique label for each group , with its
692
+ ordering matching up to the corresponding record in `values`.
693
+ min_count : Py_ssize_t
694
+ Only used in add and prod. Always -1.
695
+ is_datetimelike : bool
696
+ True if `values` contains datetime-like entries.
697
+ mask : ndarray[bool , ndim = 2 ], optional
698
+ Not used.
699
+ result_mask : ndarray[bool , ndim = 2 ], optional
700
+ Not used.
701
+
702
+ Notes
703
+ -----
704
+ This method modifies the `out` parameter rather than returning an object.
705
+ `counts` is modified to hold group sizes
706
+ """
707
+
673
708
cdef:
674
709
Py_ssize_t i , j , N , K , lab , ncounts = len (counts)
675
- floating val , count , y , t
710
+ floating val , count , y , t , nan_val
676
711
floating[:, ::1] sumx , compensation
677
712
int64_t[:, ::1] nobs
678
713
Py_ssize_t len_values = len (values), len_labels = len (labels)
@@ -682,12 +717,13 @@ def group_mean(floating[:, ::1] out,
682
717
if len_values != len_labels:
683
718
raise ValueError("len(index ) != len(labels )")
684
719
685
- nobs = np.zeros((< object > out).shape, dtype = np.int64)
686
720
# the below is equivalent to `np.zeros_like(out )` but faster
721
+ nobs = np.zeros((< object > out).shape, dtype = np.int64)
687
722
sumx = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
688
723
compensation = np.zeros((< object > out).shape, dtype = (< object > out).base.dtype)
689
724
690
725
N , K = (< object > values).shape
726
+ nan_val = NPY_NAT if is_datetimelike else NAN
691
727
692
728
with nogil:
693
729
for i in range(N ):
@@ -699,7 +735,7 @@ def group_mean(floating[:, ::1] out,
699
735
for j in range (K):
700
736
val = values[i, j]
701
737
# not nan
702
- if val == val:
738
+ if val == val and not (is_datetimelike and val == NPY_NAT) :
703
739
nobs[lab, j] += 1
704
740
y = val - compensation[lab, j]
705
741
t = sumx[lab, j] + y
@@ -710,7 +746,7 @@ def group_mean(floating[:, ::1] out,
710
746
for j in range (K):
711
747
count = nobs[i, j]
712
748
if nobs[i, j] == 0 :
713
- out[i, j] = NAN
749
+ out[i, j] = nan_val
714
750
else :
715
751
out[i, j] = sumx[i, j] / count
716
752
0 commit comments