Skip to content

Commit 2e61d97

Browse files
committed
BUG: make groupby play nice with sparse objects, modify SparseSeries.take to return SparseSeries, dictification tests, GH #666
1 parent 5472443 commit 2e61d97

File tree

7 files changed

+78
-11
lines changed

7 files changed

+78
-11
lines changed

pandas/core/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -800,7 +800,7 @@ def load(path):
800800
801801
Parameters
802802
----------
803-
p path : string
803+
path : string
804804
File path
805805
806806
Returns

pandas/core/groupby.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ def __getattr__(self, attr):
161161
raise AttributeError("'%s' object has no attribute '%s'" %
162162
(type(self).__name__, attr))
163163

164+
def __getitem__(self, key):
165+
raise NotImplementedError
166+
164167
def _make_wrapper(self, name):
165168
f = getattr(self.obj, name)
166169
if not isinstance(f, types.MethodType):
@@ -294,7 +297,13 @@ def mean(self):
294297
295298
For multiple groupings, the result index will be a MultiIndex
296299
"""
297-
return self._cython_agg_general('mean')
300+
try:
301+
return self._cython_agg_general('mean')
302+
except GroupByError:
303+
raise
304+
except Exception:
305+
f = lambda x: x.mean(axis=self.axis)
306+
return self._python_agg_general(f)
298307

299308
def std(self):
300309
"""
@@ -1256,7 +1265,6 @@ def _aggregate_generic(self, func, *args, **kwargs):
12561265

12571266
return result
12581267

1259-
12601268
class NDArrayGroupBy(GroupBy):
12611269
pass
12621270

pandas/sparse/array.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,15 @@ def _get_val_at(self, loc):
263263
else:
264264
return _gin.get_value_at(self, sp_loc)
265265

266-
def take(self, indices):
266+
def take(self, indices, axis=0):
267267
"""
268268
Sparse-compatible version of ndarray.take
269269
270270
Returns
271271
-------
272272
taken : ndarray
273273
"""
274+
assert(axis == 0)
274275
indices = np.asarray(indices, dtype=int)
275276

276277
n = len(self)

pandas/sparse/frame.py

+24
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,30 @@ def _rename_columns_inplace(self, mapper):
543543
self.columns = new_columns
544544
self._series = new_series
545545

546+
def take(self, indices, axis=0):
547+
"""
548+
Analogous to ndarray.take, return SparseDataFrame corresponding to
549+
requested indices along an axis
550+
551+
Parameters
552+
----------
553+
indices : list / array of ints
554+
axis : {0, 1}
555+
556+
Returns
557+
-------
558+
taken : SparseDataFrame
559+
"""
560+
new_values = self.values.take(indices, axis=axis)
561+
if axis == 0:
562+
new_columns = self.columns
563+
new_index = self.index.take(indices)
564+
else:
565+
new_columns = self.columns.take(indices)
566+
new_index = self.index
567+
return self._constructor(new_values, index=new_index,
568+
columns=new_columns)
569+
546570
def add_prefix(self, prefix):
547571
f = (('%s' % prefix) + '%s').__mod__
548572
return self.rename(columns=f)

pandas/sparse/series.py

+12
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,18 @@ def sparse_reindex(self, new_index):
409409
sparse_index=new_index,
410410
fill_value=self.fill_value)
411411

412+
def take(self, indices, axis=0):
413+
"""
414+
Sparse-compatible version of ndarray.take
415+
416+
Returns
417+
-------
418+
taken : ndarray
419+
"""
420+
new_values = SparseArray.take(self, indices)
421+
new_index = self.index.take(indices)
422+
return self._constructor(new_values, index=new_index)
423+
412424
def cumsum(self, axis=0, dtype=None, out=None):
413425
"""
414426
Cumulative sum of values. Preserves locations of NaN values

pandas/sparse/tests/test_sparse.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,8 @@ def _compare_with_dense(sp):
352352
def _compare(idx):
353353
dense_result = dense.take(idx).values
354354
sparse_result = sp.take(idx)
355-
assert_almost_equal(dense_result, sparse_result)
355+
self.assert_(isinstance(sparse_result, SparseSeries))
356+
assert_almost_equal(dense_result, sparse_result.values)
356357

357358
_compare([1., 2., 3., 4., 5., 0.])
358359
_compare([7, 2, 9, 0, 4])

pandas/tests/test_groupby.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -1285,14 +1285,35 @@ def test_groupby_list_infer_array_like(self):
12851285
result = df.groupby(['foo', 'bar']).mean()
12861286
expected = df.groupby([df['foo'], df['bar']]).mean()[['val']]
12871287

1288-
1289-
class TestPanelGroupBy(unittest.TestCase):
1290-
1291-
def setUp(self):
1288+
def test_dictify(self):
1289+
dict(iter(self.df.groupby('A')))
1290+
dict(iter(self.df.groupby(['A', 'B'])))
1291+
dict(iter(self.df['C'].groupby(self.df['A'])))
1292+
dict(iter(self.df['C'].groupby([self.df['A'], self.df['B']])))
1293+
dict(iter(self.df.groupby('A')['C']))
1294+
dict(iter(self.df.groupby(['A', 'B'])['C']))
1295+
1296+
def test_sparse_friendly(self):
1297+
sdf = self.df[['C', 'D']].to_sparse()
1298+
panel = tm.makePanel()
1299+
tm.add_nans(panel)
1300+
1301+
def _check_work(gp):
1302+
gp.mean()
1303+
gp.agg(np.mean)
1304+
dict(iter(gp))
1305+
1306+
# it works!
1307+
_check_work(sdf.groupby(lambda x: x // 2))
1308+
_check_work(sdf['C'].groupby(lambda x: x // 2))
1309+
_check_work(sdf.groupby(self.df['A']))
1310+
1311+
# do this someday
1312+
# _check_work(panel.groupby(lambda x: x.month, axis=1))
1313+
1314+
def test_panel_groupby(self):
12921315
self.panel = tm.makePanel()
12931316
tm.add_nans(self.panel)
1294-
1295-
def test_groupby(self):
12961317
grouped = self.panel.groupby({'ItemA' : 0, 'ItemB' : 0, 'ItemC' : 1},
12971318
axis='items')
12981319
agged = grouped.agg(np.mean)

0 commit comments

Comments
 (0)