@@ -445,6 +445,23 @@ def _selection_list(self):
445
445
return [self ._selection ]
446
446
return self ._selection
447
447
448
+ @cache_readonly
449
+ def _selected_obj (self ):
450
+
451
+ if self ._selection is None or isinstance (self .obj , Series ):
452
+ return self .obj
453
+ else :
454
+ return self .obj [self ._selection ]
455
+
456
+ def _set_selection_from_grouper (self ):
457
+ """ we may need create a selection if we have non-level groupers """
458
+ grp = self .grouper
459
+ if self ._selection is None and self .as_index and getattr (grp ,'groupings' ,None ) is not None :
460
+ ax = self .obj ._info_axis
461
+ groupers = [ g .name for g in grp .groupings if g .level is None and g .name is not None and g .name in ax ]
462
+ if len (groupers ):
463
+ self ._selection = (ax - Index (groupers )).tolist ()
464
+
448
465
def _local_dir (self ):
449
466
return sorted (set (self .obj ._local_dir () + list (self ._apply_whitelist )))
450
467
@@ -453,7 +470,6 @@ def __getattr__(self, attr):
453
470
return object .__getattribute__ (self , attr )
454
471
if attr in self .obj :
455
472
return self [attr ]
456
-
457
473
if hasattr (self .obj , attr ):
458
474
return self ._make_wrapper (attr )
459
475
@@ -472,6 +488,10 @@ def _make_wrapper(self, name):
472
488
type (self ).__name__ ))
473
489
raise AttributeError (msg )
474
490
491
+ # need to setup the selection
492
+ # as are not passed directly but in the grouper
493
+ self ._set_selection_from_grouper ()
494
+
475
495
f = getattr (self ._selected_obj , name )
476
496
if not isinstance (f , types .MethodType ):
477
497
return self .apply (lambda self : getattr (self , name ))
@@ -503,7 +523,19 @@ def curried(x):
503
523
try :
504
524
return self .apply (curried_with_axis )
505
525
except Exception :
506
- return self .apply (curried )
526
+ try :
527
+ return self .apply (curried )
528
+ except Exception :
529
+
530
+ # related to : GH3688
531
+ # try item-by-item
532
+ # this can be called recursively, so need to raise ValueError if
533
+ # we don't have this method to indicated to aggregate to
534
+ # mark this column as an error
535
+ try :
536
+ return self ._aggregate_item_by_item (name , * args , ** kwargs )
537
+ except (AttributeError ):
538
+ raise ValueError
507
539
508
540
return wrapper
509
541
@@ -624,6 +656,7 @@ def mean(self):
624
656
except GroupByError :
625
657
raise
626
658
except Exception : # pragma: no cover
659
+ self ._set_selection_from_grouper ()
627
660
f = lambda x : x .mean (axis = self .axis )
628
661
return self ._python_agg_general (f )
629
662
@@ -639,6 +672,7 @@ def median(self):
639
672
raise
640
673
except Exception : # pragma: no cover
641
674
675
+ self ._set_selection_from_grouper ()
642
676
def f (x ):
643
677
if isinstance (x , np .ndarray ):
644
678
x = Series (x )
@@ -655,6 +689,7 @@ def std(self, ddof=1):
655
689
if ddof == 1 :
656
690
return self ._cython_agg_general ('std' )
657
691
else :
692
+ self ._set_selection_from_grouper ()
658
693
f = lambda x : x .std (ddof = ddof )
659
694
return self ._python_agg_general (f )
660
695
@@ -667,15 +702,26 @@ def var(self, ddof=1):
667
702
if ddof == 1 :
668
703
return self ._cython_agg_general ('var' )
669
704
else :
705
+ self ._set_selection_from_grouper ()
670
706
f = lambda x : x .var (ddof = ddof )
671
707
return self ._python_agg_general (f )
672
708
673
709
def size (self ):
674
710
"""
675
711
Compute group sizes
712
+
676
713
"""
677
714
return self .grouper .size ()
678
715
716
+ def count (self , axis = 0 ):
717
+ """
718
+ Number of non-null items in each group.
719
+ axis : axis number, default 0
720
+ the grouping axis
721
+ """
722
+ self ._set_selection_from_grouper ()
723
+ return self ._python_agg_general (lambda x : notnull (x ).sum (axis = axis )).astype ('int64' )
724
+
679
725
sum = _groupby_function ('sum' , 'add' , np .sum )
680
726
prod = _groupby_function ('prod' , 'prod' , np .prod )
681
727
min = _groupby_function ('min' , 'min' , np .min , numeric_only = False )
@@ -685,14 +731,14 @@ def size(self):
685
731
last = _groupby_function ('last' , 'last' , _last_compat , numeric_only = False ,
686
732
_convert = True )
687
733
734
+
688
735
def ohlc (self ):
689
736
"""
690
737
Compute sum of values, excluding missing values
691
-
692
738
For multiple groupings, the result index will be a MultiIndex
693
-
694
739
"""
695
- return self ._cython_agg_general ('ohlc' )
740
+ return self ._apply_to_column_groupbys (
741
+ lambda x : x ._cython_agg_general ('ohlc' ))
696
742
697
743
def nth (self , n , dropna = None ):
698
744
"""
@@ -888,13 +934,6 @@ def _cumcount_array(self, arr=None, **kwargs):
888
934
cumcounts [v ] = arr [len (v )- 1 ::- 1 ]
889
935
return cumcounts
890
936
891
- @cache_readonly
892
- def _selected_obj (self ):
893
- if self ._selection is None or isinstance (self .obj , Series ):
894
- return self .obj
895
- else :
896
- return self .obj [self ._selection ]
897
-
898
937
def _index_with_as_index (self , b ):
899
938
"""
900
939
Take boolean mask of index to be returned from apply, if as_index=True
@@ -990,12 +1029,23 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
990
1029
result = result .reindex (ax )
991
1030
else :
992
1031
result = result .reindex_axis (ax , axis = self .axis )
993
- elif self .group_keys and self .as_index :
994
- group_keys = keys
995
- group_levels = self .grouper .levels
996
- group_names = self .grouper .names
997
- result = concat (values , axis = self .axis , keys = group_keys ,
998
- levels = group_levels , names = group_names )
1032
+
1033
+ elif self .group_keys :
1034
+
1035
+ if self .as_index :
1036
+
1037
+ # possible MI return case
1038
+ group_keys = keys
1039
+ group_levels = self .grouper .levels
1040
+ group_names = self .grouper .names
1041
+ result = concat (values , axis = self .axis , keys = group_keys ,
1042
+ levels = group_levels , names = group_names )
1043
+ else :
1044
+
1045
+ # GH5610, returns a MI, with the first level being a
1046
+ # range index
1047
+ keys = list (range (len (values )))
1048
+ result = concat (values , axis = self .axis , keys = keys )
999
1049
else :
1000
1050
result = concat (values , axis = self .axis )
1001
1051
@@ -2187,6 +2237,9 @@ def true_and_notnull(x, *args, **kwargs):
2187
2237
filtered = self ._apply_filter (indices , dropna )
2188
2238
return filtered
2189
2239
2240
+ def _apply_to_column_groupbys (self , func ):
2241
+ """ return a pass thru """
2242
+ return func (self )
2190
2243
2191
2244
class NDFrameGroupBy (GroupBy ):
2192
2245
@@ -2486,6 +2539,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
2486
2539
elif hasattr (self .grouper , 'groupings' ):
2487
2540
if len (self .grouper .groupings ) > 1 :
2488
2541
key_index = MultiIndex .from_tuples (keys , names = key_names )
2542
+
2489
2543
else :
2490
2544
ping = self .grouper .groupings [0 ]
2491
2545
if len (keys ) == ping .ngroups :
@@ -2498,8 +2552,13 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
2498
2552
# reorder the values
2499
2553
values = [values [i ] for i in indexer ]
2500
2554
else :
2555
+
2501
2556
key_index = Index (keys , name = key_names [0 ])
2502
2557
2558
+ # don't use the key indexer
2559
+ if not self .as_index :
2560
+ key_index = None
2561
+
2503
2562
# make Nones an empty object
2504
2563
if com ._count_not_none (* values ) != len (values ):
2505
2564
v = next (v for v in values if v is not None )
@@ -2569,7 +2628,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
2569
2628
2570
2629
# normally use vstack as its faster than concat
2571
2630
# and if we have mi-columns
2572
- if not _np_version_under1p7 or isinstance (v .index ,MultiIndex ):
2631
+ if not _np_version_under1p7 or isinstance (v .index ,MultiIndex ) or key_index is None :
2573
2632
stacked_values = np .vstack ([np .asarray (x ) for x in values ])
2574
2633
result = DataFrame (stacked_values ,index = key_index ,columns = index )
2575
2634
else :
@@ -2889,16 +2948,6 @@ def _apply_to_column_groupbys(self, func):
2889
2948
in self ._iterate_column_groupbys ()),
2890
2949
keys = self ._selected_obj .columns , axis = 1 )
2891
2950
2892
- def ohlc (self ):
2893
- """
2894
- Compute sum of values, excluding missing values
2895
-
2896
- For multiple groupings, the result index will be a MultiIndex
2897
- """
2898
- return self ._apply_to_column_groupbys (
2899
- lambda x : x ._cython_agg_general ('ohlc' ))
2900
-
2901
-
2902
2951
from pandas .tools .plotting import boxplot_frame_groupby
2903
2952
DataFrameGroupBy .boxplot = boxplot_frame_groupby
2904
2953
0 commit comments