@@ -208,6 +208,8 @@ class GroupBy(PandasObject):
208
208
Number of groups
209
209
"""
210
210
_apply_whitelist = _common_apply_whitelist
211
+ _internal_names = ['_cache' ]
212
+ _internal_names_set = set (_internal_names )
211
213
212
214
def __init__ (self , obj , keys = None , axis = 0 , level = None ,
213
215
grouper = None , exclusions = None , selection = None , as_index = True ,
@@ -288,10 +290,12 @@ def _local_dir(self):
288
290
return sorted (set (self .obj ._local_dir () + list (self ._apply_whitelist )))
289
291
290
292
def __getattr__ (self , attr ):
293
+ if attr in self ._internal_names_set :
294
+ return object .__getattribute__ (self , attr )
291
295
if attr in self .obj :
292
296
return self [attr ]
293
297
294
- if hasattr (self .obj , attr ) and attr != '_cache' :
298
+ if hasattr (self .obj , attr ):
295
299
return self ._make_wrapper (attr )
296
300
297
301
raise AttributeError ("%r object has no attribute %r" %
@@ -302,18 +306,18 @@ def __getitem__(self, key):
302
306
303
307
def _make_wrapper (self , name ):
304
308
if name not in self ._apply_whitelist :
305
- is_callable = callable (getattr (self .obj , name , None ))
309
+ is_callable = callable (getattr (self ._selected_obj , name , None ))
306
310
kind = ' callable ' if is_callable else ' '
307
311
msg = ("Cannot access{0}attribute {1!r} of {2!r} objects, try "
308
312
"using the 'apply' method" .format (kind , name ,
309
313
type (self ).__name__ ))
310
314
raise AttributeError (msg )
311
315
312
- f = getattr (self .obj , name )
316
+ f = getattr (self ._selected_obj , name )
313
317
if not isinstance (f , types .MethodType ):
314
318
return self .apply (lambda self : getattr (self , name ))
315
319
316
- f = getattr (type (self .obj ), name )
320
+ f = getattr (type (self ._selected_obj ), name )
317
321
318
322
def wrapper (* args , ** kwargs ):
319
323
# a little trickery for aggregation functions that need an axis
@@ -362,7 +366,7 @@ def get_group(self, name, obj=None):
362
366
group : type of obj
363
367
"""
364
368
if obj is None :
365
- obj = self .obj
369
+ obj = self ._selected_obj
366
370
367
371
inds = self ._get_index (name )
368
372
return obj .take (inds , axis = self .axis , convert = False )
@@ -424,7 +428,8 @@ def f(g):
424
428
return self ._python_apply_general (f )
425
429
426
430
def _python_apply_general (self , f ):
427
- keys , values , mutated = self .grouper .apply (f , self .obj , self .axis )
431
+ keys , values , mutated = self .grouper .apply (f , self ._selected_obj ,
432
+ self .axis )
428
433
429
434
return self ._wrap_applied_output (keys , values ,
430
435
not_indexed_same = mutated )
@@ -437,7 +442,7 @@ def agg(self, func, *args, **kwargs):
437
442
return self .aggregate (func , * args , ** kwargs )
438
443
439
444
def _iterate_slices (self ):
440
- yield self .name , self .obj
445
+ yield self .name , self ._selected_obj
441
446
442
447
def transform (self , func , * args , ** kwargs ):
443
448
raise NotImplementedError
@@ -573,7 +578,7 @@ def nth(self, n, dropna=None):
573
578
return self ._selected_obj [is_nth ]
574
579
575
580
if (isinstance (self ._selected_obj , DataFrame )
576
- and dropna not in ['any' , 'all' ]):
581
+ and dropna not in ['any' , 'all' ]):
577
582
# Note: when agg-ing picker doesn't raise this, just returns NaN
578
583
raise ValueError ("For a DataFrame groupby, dropna must be "
579
584
"either None, 'any' or 'all', "
@@ -582,6 +587,7 @@ def nth(self, n, dropna=None):
582
587
# old behaviour, but with all and any support for DataFrames.
583
588
584
589
max_len = n if n >= 0 else - 1 - n
590
+
585
591
def picker (x ):
586
592
x = x .dropna (how = dropna ) # Note: how is ignored if Series
587
593
if len (x ) <= max_len :
@@ -591,7 +597,6 @@ def picker(x):
591
597
592
598
return self .agg (picker )
593
599
594
-
595
600
def cumcount (self , ** kwargs ):
596
601
"""
597
602
Number each item in each group from 0 to the length of that group - 1.
@@ -638,7 +643,7 @@ def cumcount(self, **kwargs):
638
643
"""
639
644
ascending = kwargs .pop ('ascending' , True )
640
645
641
- index = self .obj .index
646
+ index = self ._selected_obj .index
642
647
cumcounts = self ._cumcount_array (ascending = ascending )
643
648
return Series (cumcounts , index )
644
649
@@ -706,8 +711,9 @@ def _cumcount_array(self, arr=None, **kwargs):
706
711
if arr is None :
707
712
arr = np .arange (self .grouper ._max_groupsize , dtype = 'int64' )
708
713
709
- len_index = len (self .obj .index )
714
+ len_index = len (self ._selected_obj .index )
710
715
cumcounts = np .empty (len_index , dtype = arr .dtype )
716
+
711
717
if ascending :
712
718
for v in self .indices .values ():
713
719
cumcounts [v ] = arr [:len (v )]
@@ -722,15 +728,15 @@ def _selected_obj(self):
722
728
return self .obj
723
729
else :
724
730
return self .obj [self ._selection ]
725
-
731
+
726
732
def _index_with_as_index (self , b ):
727
733
"""
728
734
Take boolean mask of index to be returned from apply, if as_index=True
729
735
730
736
"""
731
737
# TODO perf, it feels like this should already be somewhere...
732
738
from itertools import chain
733
- original = self .obj .index
739
+ original = self ._selected_obj .index
734
740
gp = self .grouper
735
741
levels = chain ((gp .levels [i ][gp .labels [i ][b ]]
736
742
for i in range (len (gp .groupings ))),
@@ -812,7 +818,7 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
812
818
813
819
if not not_indexed_same :
814
820
result = concat (values , axis = self .axis )
815
- ax = self .obj ._get_axis (self .axis )
821
+ ax = self ._selected_obj ._get_axis (self .axis )
816
822
817
823
if isinstance (result , Series ):
818
824
result = result .reindex (ax )
@@ -835,14 +841,14 @@ def _apply_filter(self, indices, dropna):
835
841
else :
836
842
indices = np .sort (np .concatenate (indices ))
837
843
if dropna :
838
- filtered = self .obj .take (indices )
844
+ filtered = self ._selected_obj .take (indices )
839
845
else :
840
- mask = np .empty (len (self .obj .index ), dtype = bool )
846
+ mask = np .empty (len (self ._selected_obj .index ), dtype = bool )
841
847
mask .fill (False )
842
848
mask [indices .astype (int )] = True
843
849
# mask fails to broadcast when passed to where; broadcast manually.
844
- mask = np .tile (mask , list (self .obj .shape [1 :]) + [1 ]).T
845
- filtered = self .obj .where (mask ) # Fill with NaNs.
850
+ mask = np .tile (mask , list (self ._selected_obj .shape [1 :]) + [1 ]).T
851
+ filtered = self ._selected_obj .where (mask ) # Fill with NaNs.
846
852
return filtered
847
853
848
854
@@ -1908,7 +1914,7 @@ def transform(self, func, *args, **kwargs):
1908
1914
-------
1909
1915
transformed : Series
1910
1916
"""
1911
- result = self .obj .copy ()
1917
+ result = self ._selected_obj .copy ()
1912
1918
if hasattr (result , 'values' ):
1913
1919
result = result .values
1914
1920
dtype = result .dtype
@@ -1933,8 +1939,8 @@ def transform(self, func, *args, **kwargs):
1933
1939
1934
1940
# downcast if we can (and need)
1935
1941
result = _possibly_downcast_to_dtype (result , dtype )
1936
- return self .obj .__class__ (result , index = self .obj .index ,
1937
- name = self .obj .name )
1942
+ return self ._selected_obj .__class__ (result , index = self ._selected_obj .index ,
1943
+ name = self ._selected_obj .name )
1938
1944
1939
1945
def filter (self , func , dropna = True , * args , ** kwargs ):
1940
1946
"""
@@ -2082,7 +2088,7 @@ def aggregate(self, arg, *args, **kwargs):
2082
2088
if self .axis != 0 : # pragma: no cover
2083
2089
raise ValueError ('Can only pass dict with axis=0' )
2084
2090
2085
- obj = self .obj
2091
+ obj = self ._selected_obj
2086
2092
2087
2093
if any (isinstance (x , (list , tuple , dict )) for x in arg .values ()):
2088
2094
new_arg = OrderedDict ()
@@ -2095,7 +2101,7 @@ def aggregate(self, arg, *args, **kwargs):
2095
2101
2096
2102
keys = []
2097
2103
if self ._selection is not None :
2098
- subset = obj [ self . _selection ]
2104
+ subset = obj
2099
2105
if isinstance (subset , DataFrame ):
2100
2106
raise NotImplementedError
2101
2107
@@ -2294,7 +2300,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
2294
2300
2295
2301
if isinstance (v , (np .ndarray , Series )):
2296
2302
if isinstance (v , Series ):
2297
- applied_index = self .obj ._get_axis (self .axis )
2303
+ applied_index = self ._selected_obj ._get_axis (self .axis )
2298
2304
all_indexed_same = _all_indexes_same ([
2299
2305
x .index for x in values
2300
2306
])
@@ -2367,7 +2373,11 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False):
2367
2373
2368
2374
# if we have date/time like in the original, then coerce dates
2369
2375
# as we are stacking can easily have object dtypes here
2370
- cd = 'coerce' if self .obj .ndim == 2 and self .obj .dtypes .isin (_DATELIKE_DTYPES ).any () else True
2376
+ if (self ._selected_obj .ndim == 2
2377
+ and self ._selected_obj .dtypes .isin (_DATELIKE_DTYPES ).any ()):
2378
+ cd = 'coerce'
2379
+ else :
2380
+ cd = True
2371
2381
return result .convert_objects (convert_dates = cd )
2372
2382
2373
2383
else :
@@ -2668,8 +2678,8 @@ def _wrap_agged_blocks(self, blocks):
2668
2678
return result .convert_objects ()
2669
2679
2670
2680
def _iterate_column_groupbys (self ):
2671
- for i , colname in enumerate (self .obj .columns ):
2672
- yield colname , SeriesGroupBy (self .obj .iloc [:, i ],
2681
+ for i , colname in enumerate (self ._selected_obj .columns ):
2682
+ yield colname , SeriesGroupBy (self ._selected_obj .iloc [:, i ],
2673
2683
selection = colname ,
2674
2684
grouper = self .grouper ,
2675
2685
exclusions = self .exclusions )
@@ -2679,7 +2689,7 @@ def _apply_to_column_groupbys(self, func):
2679
2689
return concat (
2680
2690
(func (col_groupby ) for _ , col_groupby
2681
2691
in self ._iterate_column_groupbys ()),
2682
- keys = self .obj .columns , axis = 1 )
2692
+ keys = self ._selected_obj .columns , axis = 1 )
2683
2693
2684
2694
def ohlc (self ):
2685
2695
"""
@@ -2701,10 +2711,10 @@ def _iterate_slices(self):
2701
2711
if self .axis == 0 :
2702
2712
# kludge
2703
2713
if self ._selection is None :
2704
- slice_axis = self .obj .items
2714
+ slice_axis = self ._selected_obj .items
2705
2715
else :
2706
2716
slice_axis = self ._selection_list
2707
- slicer = lambda x : self .obj [x ]
2717
+ slicer = lambda x : self ._selected_obj [x ]
2708
2718
else :
2709
2719
raise NotImplementedError
2710
2720
0 commit comments