Skip to content

Commit 91e9d81

Browse files
committed
API: update nth to use the _set_selection_from_grouper
makes first == nth(0) and last = = nth(-1)
1 parent 91a8319 commit 91e9d81

File tree

5 files changed

+72
-28
lines changed

5 files changed

+72
-28
lines changed

doc/source/groupby.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,7 @@ index are the group names and whose values are the sizes of each group.
397397
named *columns*.
398398

399399
Aggregating functions are ones that reduce the dimension of the returned objects,
400-
for example: ``mean, sum, size, count, std, var, describe, first, last, min, max``. This is
400+
for example: ``mean, sum, size, count, std, var, describe, first, last, nth, min, max``. This is
401401
what happens when you do for example ``DataFrame.sum()`` and get back a ``Series``.
402402

403403
.. _groupby.aggregate.multifunc:
@@ -613,7 +613,7 @@ For dataframes with multiple columns, filters should explicitly specify a column
613613
a reduced shape of the original (and potentitally eliminating groups), but with the index unchanged.
614614
Passing ``as_index=False`` will not affect these transformation methods.
615615

616-
For example: ``head, tail nth``.
616+
For example: ``head, tail``.
617617

618618
.. ipython:: python
619619

doc/source/release.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ API Changes
190190
validation warnings in :func:`read_csv`/:func:`read_table` (:issue:`6607`)
191191
- Raise a ``TypeError`` when ``DataFrame`` is passed an iterator as the
192192
``data`` argument (:issue:`5357`)
193-
- groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`),
193+
- groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`, :issue:`6732`),
194194
as its already the index
195195
- ``DataFrame.plot`` and ``Series.plot`` now supports area plot with specifying ``kind='area'`` (:issue:`6656`)
196196
- Line plot can be stacked by ``stacked=True``. (:issue:`6656`)

doc/source/v0.14.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ API changes
124124

125125
g.nth(0, dropna='any') # similar to old behaviour
126126

127-
groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`),
127+
groupby will now not return the grouped column for non-cython functions (:issue:`5610`, :issue:`5614`, :issue:`6732`),
128128
as its already the index
129129

130130
.. ipython:: python

pandas/core/groupby.py

+23-3
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ class SpecificationError(GroupByError):
9999
def _groupby_function(name, alias, npfunc, numeric_only=True,
100100
_convert=False):
101101
def f(self):
102+
self._set_selection_from_grouper()
102103
try:
103104
return self._cython_agg_general(alias, numeric_only=numeric_only)
104105
except AssertionError as e:
@@ -356,6 +357,7 @@ class GroupBy(PandasObject):
356357
_apply_whitelist = _common_apply_whitelist
357358
_internal_names = ['_cache']
358359
_internal_names_set = set(_internal_names)
360+
_group_selection = None
359361

360362
def __init__(self, obj, keys=None, axis=0, level=None,
361363
grouper=None, exclusions=None, selection=None, as_index=True,
@@ -454,18 +456,20 @@ def _selection_list(self):
454456
def _selected_obj(self):
455457

456458
if self._selection is None or isinstance(self.obj, Series):
459+
if self._group_selection is not None:
460+
return self.obj[self._group_selection]
457461
return self.obj
458462
else:
459463
return self.obj[self._selection]
460464

461465
def _set_selection_from_grouper(self):
462466
""" we may need create a selection if we have non-level groupers """
463467
grp = self.grouper
464-
if self._selection is None and self.as_index and getattr(grp,'groupings',None) is not None:
468+
if self.as_index and getattr(grp,'groupings',None) is not None:
465469
ax = self.obj._info_axis
466470
groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
467471
if len(groupers):
468-
self._selection = (ax-Index(groupers)).tolist()
472+
self._group_selection = (ax-Index(groupers)).tolist()
469473

470474
def _local_dir(self):
471475
return sorted(set(self.obj._local_dir() + list(self._apply_whitelist)))
@@ -776,6 +780,7 @@ def nth(self, n, dropna=None):
776780
777781
"""
778782

783+
self._set_selection_from_grouper()
779784
if not dropna: # good choice
780785
m = self.grouper._max_groupsize
781786
if n >= m or n < -m:
@@ -787,7 +792,21 @@ def nth(self, n, dropna=None):
787792
else:
788793
rng[- n - 1] = True
789794
is_nth = self._cumcount_array(rng, ascending=False)
790-
return self._selected_obj[is_nth]
795+
796+
result = self._selected_obj[is_nth]
797+
798+
# the result index
799+
if self.as_index:
800+
ax = self.obj._info_axis
801+
names = self.grouper.names
802+
if all([ n in ax for n in names ]):
803+
result.index = Index(self.obj[names][is_nth].values.ravel()).set_names(names)
804+
elif self._group_selection is not None:
805+
result.index = self.obj._get_axis(self.axis)[is_nth]
806+
807+
result = result.sort_index()
808+
809+
return result
791810

792811
if (isinstance(self._selected_obj, DataFrame)
793812
and dropna not in ['any', 'all']):
@@ -853,6 +872,7 @@ def cumcount(self, **kwargs):
853872
dtype: int64
854873
855874
"""
875+
self._set_selection_from_grouper()
856876
ascending = kwargs.pop('ascending', True)
857877

858878
index = self._selected_obj.index

pandas/tests/test_groupby.py

+45-21
Original file line numberDiff line numberDiff line change
@@ -166,18 +166,27 @@ def test_first_last_nth(self):
166166
# tests for first / last / nth
167167
grouped = self.df.groupby('A')
168168
first = grouped.first()
169-
expected = self.df.ix[[1, 0], ['B', 'C', 'D']]
170-
expected.index = ['bar', 'foo']
171-
assert_frame_equal(first, expected, check_names=False)
169+
expected = self.df.ix[[1, 0], ['B','C','D']]
170+
expected.index = Index(['bar', 'foo'],name='A')
171+
expected = expected.sort_index()
172+
assert_frame_equal(first, expected)
173+
174+
nth = grouped.nth(0)
175+
assert_frame_equal(nth, expected)
172176

173177
last = grouped.last()
174-
expected = self.df.ix[[5, 7], ['B', 'C', 'D']]
175-
expected.index = ['bar', 'foo']
176-
assert_frame_equal(last, expected, check_names=False)
178+
expected = self.df.ix[[5, 7], ['B','C','D']]
179+
expected.index = Index(['bar', 'foo'],name='A')
180+
assert_frame_equal(last, expected)
181+
182+
nth = grouped.nth(-1)
183+
assert_frame_equal(nth, expected)
177184

178185
nth = grouped.nth(1)
179-
expected = self.df.iloc[[2, 3]]
180-
assert_frame_equal(nth, expected, check_names=False)
186+
expected = self.df.ix[[2, 3],['B','C','D']].copy()
187+
expected.index = Index(['foo', 'bar'],name='A')
188+
expected = expected.sort_index()
189+
assert_frame_equal(nth, expected)
181190

182191
# it works!
183192
grouped['B'].first()
@@ -189,6 +198,17 @@ def test_first_last_nth(self):
189198
self.assert_(com.isnull(grouped['B'].last()['foo']))
190199
self.assert_(com.isnull(grouped['B'].nth(0)[0])) # not sure what this is testing
191200

201+
# v0.14.0 whatsnew
202+
df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
203+
g = df.groupby('A')
204+
result = g.first()
205+
expected = df.iloc[[1,2]].set_index('A')
206+
assert_frame_equal(result, expected)
207+
208+
expected = df.iloc[[1,2]].set_index('A')
209+
result = g.nth(0,dropna='any')
210+
assert_frame_equal(result, expected)
211+
192212
def test_first_last_nth_dtypes(self):
193213

194214
df = self.df_mixed_floats.copy()
@@ -199,17 +219,21 @@ def test_first_last_nth_dtypes(self):
199219
grouped = df.groupby('A')
200220
first = grouped.first()
201221
expected = df.ix[[1, 0], ['B', 'C', 'D', 'E', 'F']]
202-
expected.index = ['bar', 'foo']
203-
assert_frame_equal(first, expected, check_names=False)
222+
expected.index = Index(['bar', 'foo'], name='A')
223+
expected = expected.sort_index()
224+
assert_frame_equal(first, expected)
204225

205226
last = grouped.last()
206227
expected = df.ix[[5, 7], ['B', 'C', 'D', 'E', 'F']]
207-
expected.index = ['bar', 'foo']
208-
assert_frame_equal(last, expected, check_names=False)
228+
expected.index = Index(['bar', 'foo'], name='A')
229+
expected = expected.sort_index()
230+
assert_frame_equal(last, expected)
209231

210232
nth = grouped.nth(1)
211-
expected = df.iloc[[2, 3]]
212-
assert_frame_equal(nth, expected, check_names=False)
233+
expected = df.ix[[3, 2],['B', 'C', 'D', 'E', 'F']]
234+
expected.index = Index(['bar', 'foo'], name='A')
235+
expected = expected.sort_index()
236+
assert_frame_equal(nth, expected)
213237

214238
# GH 2763, first/last shifting dtypes
215239
idx = lrange(10)
@@ -223,15 +247,15 @@ def test_nth(self):
223247
df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
224248
g = df.groupby('A')
225249

226-
assert_frame_equal(g.nth(0), df.iloc[[0, 2]])
227-
assert_frame_equal(g.nth(1), df.iloc[[1]])
228-
assert_frame_equal(g.nth(2), df.loc[[]])
229-
assert_frame_equal(g.nth(-1), df.iloc[[1, 2]])
230-
assert_frame_equal(g.nth(-2), df.iloc[[0]])
231-
assert_frame_equal(g.nth(-3), df.loc[[]])
250+
assert_frame_equal(g.nth(0), df.iloc[[0, 2]].set_index('A'))
251+
assert_frame_equal(g.nth(1), df.iloc[[1]].set_index('A'))
252+
assert_frame_equal(g.nth(2), df.loc[[],['B']])
253+
assert_frame_equal(g.nth(-1), df.iloc[[1, 2]].set_index('A'))
254+
assert_frame_equal(g.nth(-2), df.iloc[[0]].set_index('A'))
255+
assert_frame_equal(g.nth(-3), df.loc[[],['B']])
232256
assert_series_equal(g.B.nth(0), df.B.iloc[[0, 2]])
233257
assert_series_equal(g.B.nth(1), df.B.iloc[[1]])
234-
assert_frame_equal(g[['B']].nth(0), df.ix[[0, 2], ['B']])
258+
assert_frame_equal(g[['B']].nth(0), df.ix[[0, 2], ['A', 'B']].set_index('A'))
235259

236260
exp = df.set_index('A')
237261
assert_frame_equal(g.nth(0, dropna='any'), exp.iloc[[1, 2]])

0 commit comments

Comments
 (0)