Skip to content

Commit f58749e

Browse files
committed
ENH: intercept numpy.sum in groupby, plotting/console improvements
1 parent 330c088 commit f58749e

File tree

5 files changed

+63
-17
lines changed

5 files changed

+63
-17
lines changed

pandas/core/format.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,7 @@ def reset(self):
778778
def _put_lines(buf, lines):
779779
if any(isinstance(x, unicode) for x in lines):
780780
lines = [unicode(x) for x in lines]
781-
print >> buf, '\n'.join(lines)
781+
buf.write('\n'.join(lines))
782782

783783

784784
if __name__ == '__main__':

pandas/core/frame.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,9 @@ def _need_info_repr_(self):
454454
else:
455455
return True
456456
else:
457-
if len(self.index) > max_rows:
457+
# save us
458+
if (len(self.index) > max_rows or
459+
len(self.columns) > terminal_width // 2):
458460
return True
459461
else:
460462
buf = StringIO()
@@ -1134,7 +1136,8 @@ def info(self, verbose=True, buf=None):
11341136

11351137
cols = self.columns
11361138

1137-
if verbose:
1139+
# hack
1140+
if verbose and len(self.columns) < 100:
11381141
lines.append('Data columns:')
11391142
space = max([len(_stringify(k)) for k in self.columns]) + 4
11401143
counts = self.count()
@@ -1145,11 +1148,7 @@ def info(self, verbose=True, buf=None):
11451148
lines.append(_put_str(col, space) +
11461149
'%d non-null values' % count)
11471150
else:
1148-
if len(cols) <= 2:
1149-
lines.append('Columns: %s' % repr(cols))
1150-
else:
1151-
lines.append('Columns: %s to %s' % (_stringify(cols[0]),
1152-
_stringify(cols[-1])))
1151+
lines.append(self.columns.summary(name='Columns'))
11531152

11541153
counts = self.get_dtype_counts()
11551154
dtypes = ['%s(%d)' % k for k in sorted(counts.iteritems())]
@@ -3960,8 +3959,12 @@ def plot(self, subplots=False, sharex=True, sharey=False, use_index=True,
39603959
if xlim is not None:
39613960
ax.set_xlim(xlim)
39623961

3963-
if title and not subplots:
3964-
ax.set_title(title)
3962+
if title:
3963+
if subplots:
3964+
fig.suptitle(title)
3965+
else:
3966+
ax.set_title(title)
3967+
39653968

39663969
plt.draw_if_interactive()
39673970
if subplots:
@@ -4011,8 +4014,8 @@ def _bar_plot(self, axes, subplots=False, use_index=True, grid=True,
40114014

40124015
if legend and not subplots:
40134016
fig = ax.get_figure()
4014-
fig.legend([r[0] for r in rects], labels, loc='upper center',
4015-
fancybox=True, ncol=6)
4017+
fig.legend([r[0] for r in rects], labels, loc='lower center',
4018+
fancybox=True, ncol=6, borderaxespad=20)
40164019
#mode='expand')
40174020

40184021
import matplotlib.pyplot as plt

pandas/core/groupby.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,10 @@ def aggregate(self, func_or_funcs, *args, **kwargs):
899899
if hasattr(func_or_funcs,'__iter__'):
900900
ret = self._aggregate_multiple_funcs(func_or_funcs)
901901
else:
902+
cyfunc = _intercept_cython(func_or_funcs)
903+
if cyfunc and not args and not kwargs:
904+
return getattr(self, cyfunc)()
905+
902906
if len(self.grouper.groupings) > 1:
903907
return self._python_agg_general(func_or_funcs, *args, **kwargs)
904908

@@ -1158,6 +1162,10 @@ def aggregate(self, arg, *args, **kwargs):
11581162
elif isinstance(arg, list):
11591163
return self._aggregate_multiple_funcs(arg)
11601164
else:
1165+
cyfunc = _intercept_cython(arg)
1166+
if cyfunc and not args and not kwargs:
1167+
return getattr(self, cyfunc)()
1168+
11611169
if len(self.grouper.groupings) > 1:
11621170
return self._python_agg_general(arg, *args, **kwargs)
11631171
else:
@@ -1194,7 +1202,7 @@ def _aggregate_multiple_funcs(self, arg):
11941202
grouper=self.grouper)
11951203
results.append(colg.agg(arg))
11961204
keys.append(col)
1197-
except TypeError:
1205+
except (TypeError, GroupByError):
11981206
pass
11991207

12001208
result = concat(results, keys=keys, axis=1)
@@ -1634,9 +1642,20 @@ def _reorder_by_uniques(uniques, labels):
16341642
__builtin__.sum : np.sum
16351643
}
16361644

1645+
_cython_table = {
1646+
__builtin__.sum : 'sum',
1647+
np.sum : 'sum',
1648+
np.mean : 'mean',
1649+
np.std : 'std',
1650+
np.var : 'var'
1651+
}
1652+
16371653
def _intercept_function(func):
16381654
return _func_table.get(func, func)
16391655

1656+
def _intercept_cython(func):
1657+
return _cython_table.get(func)
1658+
16401659
def _groupby_indices(values):
16411660
if values.dtype != np.object_:
16421661
values = values.astype('O')

pandas/core/index.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,14 @@ def _has_complex_internals(self):
113113
# to disable groupby tricks in MultiIndex
114114
return False
115115

116-
def summary(self):
116+
def summary(self, name=None):
117117
if len(self) > 0:
118118
index_summary = ', %s to %s' % (str(self[0]), str(self[-1]))
119119
else:
120120
index_summary = ''
121121

122-
name = type(self).__name__
122+
if name is None:
123+
name = type(self).__name__
123124
return '%s: %s entries%s' % (name, len(self), index_summary)
124125

125126
def __str__(self):

pandas/tests/test_groupby.py

+25-2
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,8 @@ def test_agg_apply_corner(self):
202202
grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
203203
assert_frame_equal(grouped.sum(),
204204
DataFrame(columns=self.tsframe.columns))
205-
assert_frame_equal(grouped.agg(np.sum), DataFrame({}))
205+
assert_frame_equal(grouped.agg(np.sum),
206+
DataFrame(columns=self.tsframe.columns))
206207
assert_frame_equal(grouped.apply(np.sum), DataFrame({}))
207208

208209
def test_agg_grouping_is_list_tuple(self):
@@ -863,7 +864,8 @@ def test_omit_nuisance(self):
863864

864865
# won't work with axis = 1
865866
grouped = df.groupby({'A' : 0, 'C' : 0, 'D' : 1, 'E' : 1}, axis=1)
866-
result = self.assertRaises(TypeError, grouped.agg, np.sum)
867+
result = self.assertRaises(TypeError, grouped.agg,
868+
lambda x: x.sum(1, numeric_only=False))
867869

868870
def test_omit_nuisance_python_multiple(self):
869871
grouped = self.three_group.groupby(['A', 'B'])
@@ -1552,6 +1554,27 @@ def test_column_select_via_attr(self):
15521554
expected = self.df.groupby('A').agg(np.mean)
15531555
assert_frame_equal(result, expected)
15541556

1557+
def test_rank_apply(self):
1558+
lev1 = np.array([rands(10) for _ in xrange(1000)], dtype=object)
1559+
lev2 = np.array([rands(10) for _ in xrange(130)], dtype=object)
1560+
lab1 = np.random.randint(0, 1000, size=10000)
1561+
lab2 = np.random.randint(0, 130, size=10000)
1562+
1563+
df = DataFrame({'value' : np.random.randn(10000),
1564+
'key1' : lev1.take(lab1),
1565+
'key2' : lev2.take(lab2)})
1566+
1567+
result = df.groupby(['key1', 'key2']).value.rank()
1568+
1569+
expected = []
1570+
for key, piece in df.groupby(['key1', 'key2']):
1571+
expected.append(piece.value.rank())
1572+
expected = concat(expected, axis=0)
1573+
expected = expected.reindex(result.index)
1574+
1575+
assert_series_equal(result, expected)
1576+
1577+
15551578
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
15561579
tups = map(tuple, df[keys].values)
15571580
tups = com._asarray_tuplesafe(tups)

0 commit comments

Comments
 (0)