Skip to content

Commit c99f78a

Browse files
committed
ENH: cython count_level function, cleanup and tests
1 parent 23f68e3 commit c99f78a

File tree

4 files changed

+44
-26
lines changed

4 files changed

+44
-26
lines changed

pandas/core/index.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1217,6 +1217,8 @@ def sortlevel(self, level=0, ascending=True):
12171217
-------
12181218
sorted_index : MultiIndex
12191219
"""
1220+
# TODO: check if lexsorted when level=0
1221+
12201222
labels = list(self.labels)
12211223
level = self._get_level_number(level)
12221224
primary = labels.pop(level)

pandas/core/series.py

Lines changed: 7 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -573,36 +573,17 @@ def count(self, level=None):
573573

574574
def _count_level(self, level):
575575
# TODO: GENERALIZE CODE OVERLAP WITH DATAFRAME
576-
# TODO: deal with sortedness??
577-
obj = self.sortlevel(level)
578-
mask = notnull(obj.values)
579-
580-
level_index = obj.index.levels[level]
576+
mask = notnull(self.values)
577+
level_index = self.index.levels[level]
581578

582579
if len(self) == 0:
583580
return Series(0, index=level_index)
584581

585-
n = len(level_index)
586-
locs = obj.index.labels[level].searchsorted(np.arange(n))
587-
588-
# WORKAROUND: reduceat fusses about the endpoints. should file ticket?
589-
start = locs.searchsorted(0, side='right') - 1
590-
end = locs.searchsorted(len(mask), side='left')
591-
592-
result = np.zeros((n), dtype=int)
593-
out = result[start:end]
594-
np.add.reduceat(mask, locs[start:end], out=out)
595-
596-
# WORKAROUND: to see why, try this
597-
# arr = np.ones((10, 4), dtype=bool)
598-
# np.add.reduceat(arr, [0, 3, 3, 7, 9], axis=0)
599-
600-
# this stinks
601-
if len(locs) > 1:
602-
workaround_mask = locs[:-1] == locs[1:]
603-
result[:-1][workaround_mask] = 0
604-
605-
return Series(result, index=level_index)
582+
# call cython function
583+
max_bin = len(level_index)
584+
counts = lib.count_level_1d(mask.view(np.uint8),
585+
self.index.labels[level], max_bin)
586+
return Series(counts, index=level_index)
606587

607588
def value_counts(self):
608589
"""

pandas/src/groupby.pyx

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,23 @@ def _bucket_locs(index, buckets, inclusive=False):
453453

454454
return locs
455455

456+
def count_level_1d(ndarray[uint8_t, cast=True] mask,
457+
ndarray[int32_t] labels, Py_ssize_t max_bin):
458+
cdef:
459+
Py_ssize_t i, n
460+
ndarray[int64_t] counts
461+
462+
counts = np.zeros(max_bin, dtype='i8')
463+
464+
n = len(mask)
465+
466+
for i from 0 <= i < n:
467+
if mask[i]:
468+
counts[labels[i]] += 1
469+
470+
return counts
471+
472+
456473
'''
457474
458475
def ts_upsample_mean(ndarray[object] indices,

pandas/tests/test_multilevel.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,24 @@ def _check_counts(frame, axis=0):
281281
df = tm.makeTimeDataFrame()
282282
self.assertRaises(Exception, df.count, level=0)
283283

284+
def test_count_level_series(self):
285+
index = MultiIndex(levels=[['foo', 'bar', 'baz'],
286+
['one', 'two', 'three', 'four']],
287+
labels=[[0, 0, 0, 2, 2],
288+
[2, 0, 1, 1, 2]])
289+
290+
s = Series(np.random.randn(len(index)), index=index)
291+
292+
result = s.count(level=0)
293+
expected = s.groupby(level=0).count()
294+
assert_series_equal(result.astype('f8'),
295+
expected.reindex(result.index).fillna(0))
296+
297+
result = s.count(level=1)
298+
expected = s.groupby(level=1).count()
299+
assert_series_equal(result.astype('f8'),
300+
expected.reindex(result.index).fillna(0))
301+
284302
def test_count_level_corner(self):
285303
s = self.frame['A'][:0]
286304
result = s.count(level=0)

0 commit comments

Comments
 (0)