Skip to content

Commit 7ff8e48

Browse files
committed
Cythonized GroupBy all implementation
1 parent 735a9e0 commit 7ff8e48

File tree

1 file changed

+52
-2
lines changed

1 file changed

+52
-2
lines changed

pandas/_libs/groupby.pyx

+52-2
Original file line numberDiff line numberDiff line change
@@ -350,12 +350,62 @@ def group_any(ndarray[int64_t] out,
350350
with nogil:
351351
for i in range(N):
352352
lab = labels[i]
353-
if lab < 0:
353+
if lab < 0 or (skipna and isna_mask[i]):
354354
continue
355355

356-
if bool_mask[i] and not (skipna and isna_mask[i]):
356+
if bool_mask[i]:
357357
out[lab] = 1
358358

359359

360+
@cython.boundscheck(False)
361+
@cython.wraparound(False)
362+
def group_all(ndarray[int64_t] out,
363+
ndarray values,
364+
ndarray[int64_t] labels,
365+
bint skipna):
366+
"""Aggregated boolean values to show if all group elements are truthful
367+
368+
Parameters
369+
----------
370+
out : array of int64_t values which this method will write its results to
371+
values : array of values to be truth-tested
372+
labels : array containing unique label for each group, with its ordering
373+
matching up to the corresponding record in `values`
374+
skipna : boolean
375+
Flag to ignore nan values during truth testing
376+
377+
Notes
378+
-----
379+
This method modifies the `out` parameter rather than returning an object.
380+
The returned values will either be 0 or 1 (False or True, respectively).
381+
"""
382+
cdef:
383+
Py_ssize_t i, N=len(labels)
384+
int64_t lab
385+
ndarray[int64_t] bool_mask
386+
ndarray[uint8_t] isna_mask
387+
388+
if values.dtype == 'object':
389+
bool_mask = np.array([bool(x) for x in values]).astype(np.int64)
390+
isna_mask = missing.isnaobj(values).astype(np.uint8)
391+
else:
392+
bool_mask = values.astype(np.bool).astype(np.int64)
393+
isna_mask = np.isnan(values).astype(np.uint8)
394+
395+
# Because the 'all' value of an empty iterable in Python is True we can
396+
# start with an array full of ones and set to zero when a False value is
397+
# encountered
398+
out.fill(1)
399+
400+
with nogil:
401+
for i in range(N):
402+
lab = labels[i]
403+
if lab < 0 or (skipna and isna_mask[i]):
404+
continue
405+
406+
if not bool_mask[i]:
407+
out[lab] = 0
408+
409+
360410
# generated from template
361411
include "groupby_helper.pxi"

0 commit comments

Comments
 (0)