25
25
notnull , _DATELIKE_DTYPES , is_numeric_dtype ,
26
26
is_timedelta64_dtype , is_datetime64_dtype ,
27
27
is_categorical_dtype , _values_from_object ,
28
- is_datetime_or_timedelta_dtype , is_bool_dtype ,
29
- AbstractMethodError )
28
+ is_datetime_or_timedelta_dtype , is_bool ,
29
+ is_bool_dtype , AbstractMethodError )
30
30
from pandas .core .config import option_context
31
31
import pandas .lib as lib
32
32
from pandas .lib import Timestamp
@@ -491,7 +491,7 @@ def _set_result_index_ordered(self, result):
491
491
492
492
# shortcut of we have an already ordered grouper
493
493
if not self .grouper .is_monotonic :
494
- index = Index (np .concatenate ([ indices [ v ] for v in self .grouper .result_index ]))
494
+ index = Index (np .concatenate ([ indices . get ( v , []) for v in self .grouper .result_index ]))
495
495
result .index = index
496
496
result = result .sort_index ()
497
497
@@ -2436,6 +2436,8 @@ def transform(self, func, *args, **kwargs):
2436
2436
2437
2437
wrapper = lambda x : func (x , * args , ** kwargs )
2438
2438
for i , (name , group ) in enumerate (self ):
2439
+ if name not in self .indices :
2440
+ continue
2439
2441
2440
2442
object .__setattr__ (group , 'name' , name )
2441
2443
res = wrapper (group )
@@ -2451,7 +2453,7 @@ def transform(self, func, *args, **kwargs):
2451
2453
except :
2452
2454
pass
2453
2455
2454
- indexer = self ._get_index ( name )
2456
+ indexer = self .indices [ name ]
2455
2457
result [indexer ] = res
2456
2458
2457
2459
result = _possibly_downcast_to_dtype (result , dtype )
@@ -2465,9 +2467,12 @@ def _transform_fast(self, func):
2465
2467
"""
2466
2468
if isinstance (func , compat .string_types ):
2467
2469
func = getattr (self ,func )
2470
+
2468
2471
values = func ().values
2469
- counts = self .size ().values
2472
+ counts = self .size ().fillna ( 0 ). values
2470
2473
values = np .repeat (values , com ._ensure_platform_int (counts ))
2474
+ if any (counts == 0 ):
2475
+ values = self ._try_cast (values , self ._selected_obj )
2471
2476
2472
2477
return self ._set_result_index_ordered (Series (values ))
2473
2478
@@ -2502,8 +2507,11 @@ def true_and_notnull(x, *args, **kwargs):
2502
2507
return b and notnull (b )
2503
2508
2504
2509
try :
2505
- indices = [self ._get_index (name ) if true_and_notnull (group ) else []
2506
- for name , group in self ]
2510
+ indices = []
2511
+ for name , group in self :
2512
+ if true_and_notnull (group ) and name in self .indices :
2513
+ indices .append (self .indices [name ])
2514
+
2507
2515
except ValueError :
2508
2516
raise TypeError ("the filter must return a boolean result" )
2509
2517
except TypeError :
@@ -3020,24 +3028,18 @@ def transform(self, func, *args, **kwargs):
3020
3028
if not result .columns .equals (obj .columns ):
3021
3029
return self ._transform_general (func , * args , ** kwargs )
3022
3030
3023
- # a grouped that doesn't preserve the index, remap index based on the grouper
3024
- # and broadcast it
3025
- if ((not isinstance (obj .index ,MultiIndex ) and
3026
- type (result .index ) != type (obj .index )) or
3027
- len (result .index ) != len (obj .index )):
3028
- results = np .empty_like (obj .values , result .values .dtype )
3029
- indices = self .indices
3030
- for (name , group ), (i , row ) in zip (self , result .iterrows ()):
3031
+ results = np .empty_like (obj .values , result .values .dtype )
3032
+ indices = self .indices
3033
+ for (name , group ), (i , row ) in zip (self , result .iterrows ()):
3034
+ if name in indices :
3031
3035
indexer = indices [name ]
3032
3036
results [indexer ] = np .tile (row .values ,len (indexer )).reshape (len (indexer ),- 1 )
3033
- return DataFrame (results ,columns = result .columns ,index = obj .index ).convert_objects ()
3034
3037
3035
- # we can merge the result in
3036
- # GH 7383
3037
- names = result .columns
3038
- result = obj .merge (result , how = 'outer' , left_index = True , right_index = True ).iloc [:,- result .shape [1 ]:]
3039
- result .columns = names
3040
- return result
3038
+ counts = self .size ().fillna (0 ).values
3039
+ if any (counts == 0 ):
3040
+ results = self ._try_cast (results , obj [result .columns ])
3041
+
3042
+ return DataFrame (results ,columns = result .columns ,index = obj .index ).convert_objects ()
3041
3043
3042
3044
def _define_paths (self , func , * args , ** kwargs ):
3043
3045
if isinstance (func , compat .string_types ):
@@ -3129,10 +3131,9 @@ def filter(self, func, dropna=True, *args, **kwargs):
3129
3131
pass
3130
3132
3131
3133
# interpret the result of the filter
3132
- if (isinstance (res , (bool , np .bool_ )) or
3133
- np .isscalar (res ) and isnull (res )):
3134
- if res and notnull (res ):
3135
- indices .append (self ._get_index (name ))
3134
+ if is_bool (res ) or (lib .isscalar (res ) and isnull (res )):
3135
+ if res and notnull (res ) and name in self .indices :
3136
+ indices .append (self .indices [name ])
3136
3137
else :
3137
3138
# non scalars aren't allowed
3138
3139
raise TypeError ("filter function returned a %s, "
0 commit comments