@@ -1558,6 +1558,42 @@ def transform(self, func, *args, **kwargs):
1558
1558
result = _possibly_downcast_to_dtype (result , dtype )
1559
1559
return self .obj .__class__ (result ,index = self .obj .index ,name = self .obj .name )
1560
1560
1561
+ def filter (self , func , dropna = True , * args , ** kwargs ):
1562
+ """
1563
+ Return a copy of a Series excluding elements from groups that
1564
+ do not satisfy the boolean criterion specified by func.
1565
+
1566
+ Parameters
1567
+ ----------
1568
+ func : function
1569
+ To apply to each group. Should return True or False.
1570
+ dropna : Drop groups that do not pass the filter. True by default;
1571
+ if False, groups that evaluate False are filled with NaNs.
1572
+
1573
+ Example
1574
+ -------
1575
+ >>> grouped.filter(lambda x: x.mean() > 0)
1576
+
1577
+ Returns
1578
+ -------
1579
+ filtered : Series
1580
+ """
1581
+ if isinstance (func , basestring ):
1582
+ wrapper = lambda x : getattr (x , func )(* args , ** kwargs )
1583
+ else :
1584
+ wrapper = lambda x : func (x , * args , ** kwargs )
1585
+
1586
+ indexers = [self .obj .index .get_indexer (group .index ) \
1587
+ if wrapper (group ) else [] for _ , group in self ]
1588
+
1589
+ if len (indexers ) == 0 :
1590
+ filtered = self .obj .take ([]) # because np.concatenate would fail
1591
+ else :
1592
+ filtered = self .obj .take (np .concatenate (indexers ))
1593
+ if dropna :
1594
+ return filtered
1595
+ else :
1596
+ return filtered .reindex (self .obj .index ) # Fill with NaNs.
1561
1597
1562
1598
class NDFrameGroupBy (GroupBy ):
1563
1599
@@ -1928,47 +1964,22 @@ def transform(self, func, *args, **kwargs):
1928
1964
1929
1965
obj = self ._obj_with_exclusions
1930
1966
gen = self .grouper .get_iterator (obj , axis = self .axis )
1931
-
1932
- if isinstance (func , basestring ):
1933
- fast_path = lambda group : getattr (group , func )(* args , ** kwargs )
1934
- slow_path = lambda group : group .apply (lambda x : getattr (x , func )(* args , ** kwargs ), axis = self .axis )
1935
- else :
1936
- fast_path = lambda group : func (group , * args , ** kwargs )
1937
- slow_path = lambda group : group .apply (lambda x : func (x , * args , ** kwargs ), axis = self .axis )
1967
+ fast_path , slow_path = self ._define_paths (func , * args , ** kwargs )
1938
1968
1939
1969
path = None
1940
1970
for name , group in gen :
1941
1971
object .__setattr__ (group , 'name' , name )
1942
1972
1943
- # decide on a fast path
1944
1973
if path is None :
1945
-
1946
- path = slow_path
1974
+ # Try slow path and fast path.
1947
1975
try :
1948
- res = slow_path (group )
1949
-
1950
- # if we make it here, test if we can use the fast path
1951
- try :
1952
- res_fast = fast_path (group )
1953
-
1954
- # compare that we get the same results
1955
- if res .shape == res_fast .shape :
1956
- res_r = res .values .ravel ()
1957
- res_fast_r = res_fast .values .ravel ()
1958
- mask = notnull (res_r )
1959
- if (res_r [mask ] == res_fast_r [mask ]).all ():
1960
- path = fast_path
1961
-
1962
- except :
1963
- pass
1976
+ path , res = self ._choose_path (fast_path , slow_path , group )
1964
1977
except TypeError :
1965
1978
return self ._transform_item_by_item (obj , fast_path )
1966
1979
except Exception : # pragma: no cover
1967
1980
res = fast_path (group )
1968
1981
path = fast_path
1969
-
1970
1982
else :
1971
-
1972
1983
res = path (group )
1973
1984
1974
1985
# broadcasting
@@ -1988,6 +1999,35 @@ def transform(self, func, *args, **kwargs):
1988
1999
concatenated .sort_index (inplace = True )
1989
2000
return concatenated
1990
2001
2002
+ def _define_paths (self , func , * args , ** kwargs ):
2003
+ if isinstance (func , basestring ):
2004
+ fast_path = lambda group : getattr (group , func )(* args , ** kwargs )
2005
+ slow_path = lambda group : group .apply (lambda x : getattr (x , func )(* args , ** kwargs ), axis = self .axis )
2006
+ else :
2007
+ fast_path = lambda group : func (group , * args , ** kwargs )
2008
+ slow_path = lambda group : group .apply (lambda x : func (x , * args , ** kwargs ), axis = self .axis )
2009
+ return fast_path , slow_path
2010
+
2011
+ def _choose_path (self , fast_path , slow_path , group ):
2012
+ path = slow_path
2013
+ res = slow_path (group )
2014
+
2015
+ # if we make it here, test if we can use the fast path
2016
+ try :
2017
+ res_fast = fast_path (group )
2018
+
2019
+ # compare that we get the same results
2020
+ if res .shape == res_fast .shape :
2021
+ res_r = res .values .ravel ()
2022
+ res_fast_r = res_fast .values .ravel ()
2023
+ mask = notnull (res_r )
2024
+ if (res_r [mask ] == res_fast_r [mask ]).all ():
2025
+ path = fast_path
2026
+
2027
+ except :
2028
+ pass
2029
+ return path , res
2030
+
1991
2031
def _transform_item_by_item (self , obj , wrapper ):
1992
2032
# iterate through columns
1993
2033
output = {}
@@ -2008,6 +2048,63 @@ def _transform_item_by_item(self, obj, wrapper):
2008
2048
2009
2049
return DataFrame (output , index = obj .index , columns = columns )
2010
2050
2051
+ def filter (self , func , dropna = True , * args , ** kwargs ):
2052
+ """
2053
+ Return a copy of a DataFrame excluding elements from groups that
2054
+ do not satisfy the boolean criterion specified by func.
2055
+
2056
+ Parameters
2057
+ ----------
2058
+ f : function
2059
+ Function to apply to each subframe. Should return True or False.
2060
+ dropna : Drop groups that do not pass the filter. True by default;
2061
+ if False, groups that evaluate False are filled with NaNs.
2062
+
2063
+ Note
2064
+ ----
2065
+ Each subframe is endowed the attribute 'name' in case you need to know
2066
+ which group you are working on.
2067
+
2068
+ Example
2069
+ --------
2070
+ >>> grouped = df.groupby(lambda x: mapping[x])
2071
+ >>> grouped.filter(lambda x: x['A'].sum() + x['B'].sum() > 0)
2072
+ """
2073
+ from pandas .tools .merge import concat
2074
+
2075
+ indexers = []
2076
+
2077
+ obj = self ._obj_with_exclusions
2078
+ gen = self .grouper .get_iterator (obj , axis = self .axis )
2079
+
2080
+ fast_path , slow_path = self ._define_paths (func , * args , ** kwargs )
2081
+
2082
+ path = None
2083
+ for name , group in gen :
2084
+ object .__setattr__ (group , 'name' , name )
2085
+
2086
+ if path is None :
2087
+ # Try slow path and fast path.
2088
+ try :
2089
+ path , res = self ._choose_path (fast_path , slow_path , group )
2090
+ except Exception : # pragma: no cover
2091
+ res = fast_path (group )
2092
+ path = fast_path
2093
+ else :
2094
+ res = path (group )
2095
+
2096
+ if res :
2097
+ indexers .append (self .obj .index .get_indexer (group .index ))
2098
+
2099
+ if len (indexers ) == 0 :
2100
+ filtered = self .obj .take ([]) # because np.concatenate would fail
2101
+ else :
2102
+ filtered = self .obj .take (np .concatenate (indexers ))
2103
+ if dropna :
2104
+ return filtered
2105
+ else :
2106
+ return filtered .reindex (self .obj .index ) # Fill with NaNs.
2107
+
2011
2108
2012
2109
class DataFrameGroupBy (NDFrameGroupBy ):
2013
2110
0 commit comments