@@ -566,6 +566,22 @@ def _concat_objects(self, keys, values, not_indexed_same=False):
566
566
567
567
return result
568
568
569
+ def _apply_filter (self , indices , dropna ):
570
+ if len (indices ) == 0 :
571
+ indices = []
572
+ else :
573
+ indices = np .sort (np .concatenate (indices ))
574
+ if dropna :
575
+ filtered = self .obj .take (indices )
576
+ else :
577
+ mask = np .empty (len (self .obj .index ), dtype = bool )
578
+ mask .fill (False )
579
+ mask [indices .astype (int )] = True
580
+ # mask fails to broadcast when passed to where; broadcast manually.
581
+ mask = np .tile (mask , list (self .obj .shape [1 :]) + [1 ]).T
582
+ filtered = self .obj .where (mask ) # Fill with NaNs.
583
+ return filtered
584
+
569
585
570
586
@Appender (GroupBy .__doc__ )
571
587
def groupby (obj , by , ** kwds ):
@@ -1585,14 +1601,13 @@ def transform(self, func, *args, **kwargs):
1585
1601
group = com .ensure_float (group )
1586
1602
object .__setattr__ (group , 'name' , name )
1587
1603
res = wrapper (group )
1588
- indexer = self .obj .index .get_indexer (group .index )
1589
1604
if hasattr (res ,'values' ):
1590
1605
res = res .values
1591
1606
1592
1607
# need to do a safe put here, as the dtype may be different
1593
1608
# this needs to be an ndarray
1594
1609
result = Series (result )
1595
- result .loc [ indexer ] = res
1610
+ result .iloc [ self . indices [ name ] ] = res
1596
1611
result = result .values
1597
1612
1598
1613
# downcast if we can (and need)
@@ -1630,22 +1645,15 @@ def true_and_notnull(x, *args, **kwargs):
1630
1645
return b and notnull (b )
1631
1646
1632
1647
try :
1633
- indexers = [self .obj .index .get_indexer (group .index ) \
1634
- if true_and_notnull (group ) else [] \
1635
- for _ , group in self ]
1648
+ indices = [self .indices [name ] if true_and_notnull (group ) else []
1649
+ for name , group in self ]
1636
1650
except ValueError :
1637
1651
raise TypeError ("the filter must return a boolean result" )
1638
1652
except TypeError :
1639
1653
raise TypeError ("the filter must return a boolean result" )
1640
1654
1641
- if len (indexers ) == 0 :
1642
- filtered = self .obj .take ([]) # because np.concatenate would fail
1643
- else :
1644
- filtered = self .obj .take (np .sort (np .concatenate (indexers )))
1645
- if dropna :
1646
- return filtered
1647
- else :
1648
- return filtered .reindex (self .obj .index ) # Fill with NaNs.
1655
+ filtered = self ._apply_filter (indices , dropna )
1656
+ return filtered
1649
1657
1650
1658
1651
1659
class NDFrameGroupBy (GroupBy ):
@@ -2125,7 +2133,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
2125
2133
"""
2126
2134
from pandas .tools .merge import concat
2127
2135
2128
- indexers = []
2136
+ indices = []
2129
2137
2130
2138
obj = self ._obj_with_exclusions
2131
2139
gen = self .grouper .get_iterator (obj , axis = self .axis )
@@ -2146,31 +2154,25 @@ def filter(self, func, dropna=True, *args, **kwargs):
2146
2154
else :
2147
2155
res = path (group )
2148
2156
2149
- def add_indexer ():
2150
- indexers .append (self .obj . index . get_indexer ( group . index ) )
2157
+ def add_indices ():
2158
+ indices .append (self .indices [ name ] )
2151
2159
2152
2160
# interpret the result of the filter
2153
2161
if isinstance (res ,(bool ,np .bool_ )):
2154
2162
if res :
2155
- add_indexer ()
2163
+ add_indices ()
2156
2164
else :
2157
2165
if getattr (res ,'ndim' ,None ) == 1 :
2158
2166
val = res .ravel ()[0 ]
2159
2167
if val and notnull (val ):
2160
- add_indexer ()
2168
+ add_indices ()
2161
2169
else :
2162
2170
2163
2171
# in theory you could do .all() on the boolean result ?
2164
2172
raise TypeError ("the filter must return a boolean result" )
2165
2173
2166
- if len (indexers ) == 0 :
2167
- filtered = self .obj .take ([]) # because np.concatenate would fail
2168
- else :
2169
- filtered = self .obj .take (np .sort (np .concatenate (indexers )))
2170
- if dropna :
2171
- return filtered
2172
- else :
2173
- return filtered .reindex (self .obj .index ) # Fill with NaNs.
2174
+ filtered = self ._apply_filter (indices , dropna )
2175
+ return filtered
2174
2176
2175
2177
2176
2178
class DataFrameGroupBy (NDFrameGroupBy ):
0 commit comments