@@ -467,7 +467,7 @@ def _selected_obj(self):
467
467
def _set_selection_from_grouper (self ):
468
468
""" we may need create a selection if we have non-level groupers """
469
469
grp = self .grouper
470
- if self .as_index and getattr (grp ,'groupings' ,None ) is not None :
470
+ if self .as_index and getattr (grp ,'groupings' ,None ) is not None and self . obj . ndim > 1 :
471
471
ax = self .obj ._info_axis
472
472
groupers = [ g .name for g in grp .groupings if g .level is None and g .name is not None and g .name in ax ]
473
473
if len (groupers ):
@@ -759,7 +759,7 @@ def nth(self, n, dropna=None):
759
759
760
760
Examples
761
761
--------
762
- >>> DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
762
+ >>> df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
763
763
>>> g = df.groupby('A')
764
764
>>> g.nth(0)
765
765
A B
@@ -804,7 +804,10 @@ def nth(self, n, dropna=None):
804
804
if self .as_index :
805
805
ax = self .obj ._info_axis
806
806
names = self .grouper .names
807
- if all ([ n in ax for n in names ]):
807
+ if self .obj .ndim == 1 :
808
+ # this is a pass-thru
809
+ pass
810
+ elif all ([ n in ax for n in names ]):
808
811
result .index = Index (self .obj [names ][is_nth ].values .ravel ()).set_names (names )
809
812
elif self ._group_selection is not None :
810
813
result .index = self .obj ._get_axis (self .axis )[is_nth ]
@@ -821,17 +824,29 @@ def nth(self, n, dropna=None):
821
824
"(was passed %s)." % (dropna ),)
822
825
823
826
# old behaviour, but with all and any support for DataFrames.
824
-
827
+ # modified in GH 7559 to have better perf
825
828
max_len = n if n >= 0 else - 1 - n
829
+ dropped = self .obj .dropna (how = dropna , axis = self .axis )
826
830
827
- def picker (x ):
828
- x = x .dropna (how = dropna ) # Note: how is ignored if Series
829
- if len (x ) <= max_len :
830
- return np .nan
831
- else :
832
- return x .iloc [n ]
831
+ # get a new grouper for our dropped obj
832
+ grouper , exclusions , obj = _get_grouper (dropped , key = self .keys , axis = self .axis ,
833
+ level = self .level , sort = self .sort )
834
+
835
+ sizes = obj .groupby (grouper ).size ()
836
+ result = obj .groupby (grouper ).nth (n )
837
+ mask = (sizes < max_len ).values
838
+
839
+ # set the results which don't meet the criteria
840
+ if len (result ) and mask .any ():
841
+ result .loc [mask ] = np .nan
833
842
834
- return self .agg (picker )
843
+ # reset/reindex to the original groups
844
+ if len (self .obj ) == len (dropped ):
845
+ result .index = self .grouper .result_index
846
+ else :
847
+ result = result .reindex (self .grouper .result_index )
848
+
849
+ return result
835
850
836
851
def cumcount (self , ** kwargs ):
837
852
"""
@@ -942,21 +957,33 @@ def tail(self, n=5):
942
957
def _cumcount_array (self , arr = None , ** kwargs ):
943
958
"""
944
959
arr is where cumcount gets it's values from
960
+
961
+ note: this is currently implementing sort=False (though the default is sort=True)
962
+ for groupby in general
945
963
"""
946
964
ascending = kwargs .pop ('ascending' , True )
947
965
948
966
if arr is None :
949
967
arr = np .arange (self .grouper ._max_groupsize , dtype = 'int64' )
950
968
951
969
len_index = len (self ._selected_obj .index )
952
- cumcounts = np .empty (len_index , dtype = arr .dtype )
970
+ cumcounts = np .zeros (len_index , dtype = arr .dtype )
971
+ if not len_index :
972
+ return cumcounts
973
+
974
+ indices , values = [], []
975
+ for v in self .indices .values ():
976
+ indices .append (v )
977
+
978
+ if ascending :
979
+ values .append (arr [:len (v )])
980
+ else :
981
+ values .append (arr [len (v )- 1 ::- 1 ])
982
+
983
+ indices = np .concatenate (indices )
984
+ values = np .concatenate (values )
985
+ cumcounts [indices ] = values
953
986
954
- if ascending :
955
- for v in self .indices .values ():
956
- cumcounts [v ] = arr [:len (v )]
957
- else :
958
- for v in self .indices .values ():
959
- cumcounts [v ] = arr [len (v )- 1 ::- 1 ]
960
987
return cumcounts
961
988
962
989
def _index_with_as_index (self , b ):
@@ -1270,6 +1297,7 @@ def group_info(self):
1270
1297
comp_ids = com ._ensure_int64 (comp_ids )
1271
1298
return comp_ids , obs_group_ids , ngroups
1272
1299
1300
+
1273
1301
def _get_compressed_labels (self ):
1274
1302
all_labels = [ping .labels for ping in self .groupings ]
1275
1303
if self ._overflow_possible :
@@ -1892,7 +1920,6 @@ def groups(self):
1892
1920
self ._groups = self .index .groupby (self .grouper )
1893
1921
return self ._groups
1894
1922
1895
-
1896
1923
def _get_grouper (obj , key = None , axis = 0 , level = None , sort = True ):
1897
1924
"""
1898
1925
create and return a BaseGrouper, which is an internal
@@ -2141,7 +2168,10 @@ def _wrap_aggregated_output(self, output, names=None):
2141
2168
if names is not None :
2142
2169
return DataFrame (output , index = index , columns = names )
2143
2170
else :
2144
- return Series (output , index = index , name = self .name )
2171
+ name = self .name
2172
+ if name is None :
2173
+ name = self ._selected_obj .name
2174
+ return Series (output , index = index , name = name )
2145
2175
2146
2176
def _wrap_applied_output (self , keys , values , not_indexed_same = False ):
2147
2177
if len (keys ) == 0 :
0 commit comments