ENH: test coverage, made pivot_table work with no rows passed and margins=True

wesm · wesm · commit 08a5523dae7f · 2012-01-18T23:11:24.000-05:00
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -375,15 +375,16 @@ In a current or later Python session, you can retrieve stored objects:
 
    store['df']
 
-Storing in Table format
-~~~~~~~~~~~~~~~~~~~~~~~
-
-Querying objects stored in Table format
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
 .. ipython:: python
    :suppress:
 
    store.close()
    import os
    os.remove('store.h5')
+
+
+.. Storing in Table format
+.. ~~~~~~~~~~~~~~~~~~~~~~~
+
+.. Querying objects stored in Table format
+.. ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4000,7 +4000,8 @@ def _lexsort_indexer(keys):
         shape.append(len(rizer.uniques))
 
     group_index = get_group_index(labels, shape)
-    comp_ids, _, max_group = _compress_group_index(group_index)
+    comp_ids, obs_ids = _compress_group_index(group_index)
+    max_group = len(obs_ids)
     indexer, _ = lib.groupsort_indexer(comp_ids.astype('i4'), max_group)
     return indexer
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -223,14 +223,13 @@ def __iter__(self):
     def _multi_iter(self):
         data = self.obj
         group_index = self._group_index
-        comp_ids, _, ngroups = _compress_group_index(group_index)
+        comp_ids, obs_ids = _compress_group_index(group_index)
+        ngroups = len(obs_ids)
         label_list = [ping.labels for ping in self.groupings]
         level_list = [ping.group_index for ping in self.groupings]
         mapper = _KeyMapper(comp_ids, ngroups, label_list, level_list)
 
         for label, group in self._generate_groups(data, comp_ids, ngroups):
-            if group is None:
-                continue
             key = mapper.get_key(label)
             yield key, group
 
@@ -335,7 +334,8 @@ def _cython_agg_general(self, how):
         # aggregate all the columns at once?)
 
         group_index = self._group_index
-        comp_ids, obs_group_ids, max_group = _compress_group_index(group_index)
+        comp_ids, obs_group_ids = _compress_group_index(group_index)
+        max_group = len(obs_group_ids)
 
         output = {}
         for name, obj in self._iterate_slices():
@@ -355,6 +355,32 @@ def _cython_agg_general(self, how):
 
         return self._wrap_aggregated_output(output, mask, obs_group_ids)
 
+    def _python_agg_general(self, func, *args, **kwargs):
+        agg_func = lambda x: func(x, *args, **kwargs)
+
+        group_index = self._group_index
+        comp_ids, obs_group_ids = _compress_group_index(group_index)
+        max_group = len(obs_group_ids)
+
+        # iterate through "columns" ex exclusions to populate output dict
+        output = {}
+        for name, obj in self._iterate_slices():
+            try:
+                result, counts = self._aggregate_series(obj, agg_func,
+                                                        comp_ids, max_group)
+                output[name] = result
+            except TypeError:
+                continue
+
+        if len(output) == 0:
+            return self._python_apply_general(func, *args, **kwargs)
+
+        mask = counts.ravel() > 0
+        for name, result in output.iteritems():
+            output[name] = result[mask]
+
+        return self._wrap_aggregated_output(output, mask, obs_group_ids)
+
     @property
     def _group_index(self):
         result = get_group_index([ping.labels for ping in self.groupings],
@@ -380,31 +406,6 @@ def _get_group_levels(self, mask, obs_ids):
 
         return name_list
 
-    def _python_agg_general(self, func, *args, **kwargs):
-        agg_func = lambda x: func(x, *args, **kwargs)
-
-        group_index = self._group_index
-        comp_ids, obs_group_ids, max_group = _compress_group_index(group_index)
-
-        # iterate through "columns" ex exclusions to populate output dict
-        output = {}
-        for name, obj in self._iterate_slices():
-            try:
-                result, counts = self._aggregate_series(obj, agg_func,
-                                                        comp_ids, max_group)
-                output[name] = result
-            except TypeError:
-                continue
-
-        if len(output) == 0:
-            return self._python_apply_general(func, *args, **kwargs)
-
-        mask = counts.ravel() > 0
-        for name, result in output.iteritems():
-            output[name] = result[mask]
-
-        return self._wrap_aggregated_output(output, mask, obs_group_ids)
-
     def _aggregate_series(self, obj, func, group_index, ngroups):
         try:
             return self._aggregate_series_fast(obj, func, group_index, ngroups)
@@ -431,8 +432,6 @@ def _aggregate_series_pure_python(self, obj, func, group_index, ngroups):
         result = None
 
         for label, group in self._generate_groups(obj, group_index, ngroups):
-            if group is None:
-                continue
             res = func(group)
             if result is None:
                 try:
@@ -597,7 +596,6 @@ def __iter__(self):
         return iter(self.indices)
 
     _labels = None
-    _ids = None
     _counts = None
     _group_index = None
 
@@ -615,13 +613,6 @@ def labels(self):
             self._make_labels()
         return self._labels
 
-    @property
-    def ids(self):
-        if self._ids is None:
-            index = self.group_index
-            self._ids = dict(zip(range(len(index)), index))
-        return self._ids
-
     @property
     def counts(self):
         if self._counts is None:
@@ -1297,10 +1288,11 @@ def _get_slice(slob):
                                        ngroups)
 
     for i, (start, end) in enumerate(zip(starts, ends)):
-        if start == end:
-            yield i, None
-        else:
-            yield i, _get_slice(slice(start, end))
+        # Since I'm now compressing the group ids, it's now not "possible" to
+        # produce empty slices because such groups would not be observed in the
+        # data
+        assert(start < end)
+        yield i, _get_slice(slice(start, end))
 
 def get_group_index(label_list, shape):
     if len(label_list) == 1:
@@ -1390,7 +1382,6 @@ def _compress_group_index(group_index, sort=True):
 
     group_index = _ensure_int64(group_index)
     comp_ids = table.get_labels_groupby(group_index, uniques)
-    max_group = len(uniques)
 
     # these are the ones we observed
     obs_group_ids = np.array(uniques, dtype='i8')
@@ -1406,7 +1397,7 @@ def _compress_group_index(group_index, sort=True):
 
         obs_group_ids = obs_group_ids.take(sorter)
 
-    return comp_ids, obs_group_ids, max_group
+    return comp_ids, obs_group_ids
 
 def _groupby_indices(values):
     if values.dtype != np.object_:
diff --git a/pandas/tests/test_daterange.py b/pandas/tests/test_daterange.py
@@ -107,6 +107,11 @@ def test_getitem(self):
         # 32-bit vs. 64-bit platforms
         self.assertEquals(self.rng[4], self.rng[np.int_(4)])
 
+    def test_getitem_matplotlib_hackaround(self):
+        values = self.rng[:, None]
+        expected = self.rng.values[:, None]
+        self.assert_(np.array_equal(values, expected))
+
     def test_shift(self):
         shifted = self.rng.shift(5)
         self.assertEquals(shifted[0], self.rng[5])
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -1131,6 +1131,7 @@ def _check_op(arr, op):
 
     def test_series_frame_radd_bug(self):
         from pandas.util.testing import rands
+        import operator
 
         # GH 353
         vals = Series([rands(5) for _ in xrange(10)])
@@ -1143,6 +1144,9 @@ def test_series_frame_radd_bug(self):
         expected = DataFrame({'vals' : vals.map(lambda x: 'foo_' + x)})
         tm.assert_frame_equal(result, expected)
 
+        # really raise this time
+        self.assertRaises(TypeError, operator.add, datetime.now(), self.ts)
+
     def test_operators_frame(self):
         # rpow does not work with DataFrame
         df = DataFrame({'A' : self.ts})
diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py
@@ -108,23 +108,6 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean',
 DataFrame.pivot_table = pivot_table
 
 def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean):
-    if len(cols) > 0:
-        col_margin = data[rows + values].groupby(rows).agg(aggfunc)
-
-        # need to "interleave" the margins
-        table_pieces = []
-        margin_keys = []
-        for key, piece in table.groupby(level=0, axis=1):
-            all_key = (key, 'All') + ('',) * (len(cols) - 1)
-            piece[all_key] = col_margin[key]
-            table_pieces.append(piece)
-            margin_keys.append(all_key)
-
-        result = concat(table_pieces, axis=1)
-    else:
-        result = table
-        margin_keys = table.columns
-
     grand_margin = {}
     for k, v in data[values].iteritems():
         try:
@@ -135,6 +118,40 @@ def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean):
         except TypeError:
             pass
 
+    if len(cols) > 0:
+        # need to "interleave" the margins
+        table_pieces = []
+        margin_keys = []
+
+
+        def _all_key(key):
+            return (key, 'All') + ('',) * (len(cols) - 1)
+
+        if len(rows) > 0:
+            margin = data[rows + values].groupby(rows).agg(aggfunc)
+            cat_axis = 1
+            for key, piece in table.groupby(level=0, axis=cat_axis):
+                all_key = _all_key(key)
+                piece[all_key] = margin[key]
+                table_pieces.append(piece)
+                margin_keys.append(all_key)
+        else:
+            margin = grand_margin
+            cat_axis = 0
+            for key, piece in table.groupby(level=0, axis=cat_axis):
+                all_key = _all_key(key)
+                table_pieces.append(piece)
+                table_pieces.append(Series(margin[key], index=[all_key]))
+                margin_keys.append(all_key)
+
+        result = concat(table_pieces, axis=cat_axis)
+
+        if len(rows) == 0:
+            return result
+    else:
+        result = table
+        margin_keys = table.columns
+
     if len(cols) > 0:
         row_margin = data[cols + values].groupby(cols).agg(aggfunc)
         row_margin = row_margin.stack()
diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py
@@ -116,14 +116,17 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']):
             gmarg = table[valcol]['All', '']
             self.assertEqual(gmarg, self.data[valcol].mean())
 
-        # doesn't quite work yet
-
-        # # no rows
-        # table = self.data.pivot_table(cols=['A', 'B'], margins=True,
-        #                               aggfunc=np.mean)
-        # for valcol in table.columns:
-        #     gmarg = table[valcol]['All', '']
-        #     self.assertEqual(gmarg, self.data[valcol].mean())
+        # this is OK
+        table = self.data.pivot_table(rows=['AA', 'BB'], margins=True,
+                                      aggfunc='mean')
+
+        # no rows
+        rtable = self.data.pivot_table(cols=['AA', 'BB'], margins=True,
+                                      aggfunc=np.mean)
+        self.assert_(isinstance(rtable, Series))
+        for item in ['DD', 'EE', 'FF']:
+            gmarg = table[item]['All', '']
+            self.assertEqual(gmarg, self.data[item].mean())
 
 
 class TestCrosstab(unittest.TestCase):