@@ -29,6 +29,8 @@ class providing the base-class of operations.
29
29
ensure_float , is_extension_array_dtype , is_numeric_dtype , is_scalar )
30
30
from pandas .core .dtypes .missing import isna , notna
31
31
32
+ from pandas .api .types import (
33
+ is_datetime64_dtype , is_integer_dtype , is_object_dtype )
32
34
import pandas .core .algorithms as algorithms
33
35
from pandas .core .base import (
34
36
DataError , GroupByError , PandasObject , SelectionMixin , SpecificationError )
@@ -1024,15 +1026,17 @@ def _bool_agg(self, val_test, skipna):
1024
1026
"""
1025
1027
1026
1028
def objs_to_bool (vals ):
1027
- try :
1028
- vals = vals .astype (np .bool )
1029
- except ValueError : # for objects
1029
+ # type: np.ndarray -> (np.ndarray, typing.Type)
1030
+ if is_object_dtype (vals ):
1030
1031
vals = np .array ([bool (x ) for x in vals ])
1032
+ else :
1033
+ vals = vals .astype (np .bool )
1031
1034
1032
- return vals .view (np .uint8 )
1035
+ return vals .view (np .uint8 ), np . bool
1033
1036
1034
- def result_to_bool (result ):
1035
- return result .astype (np .bool , copy = False )
1037
+ def result_to_bool (result , inference ):
1038
+ # type: (np.ndarray, typing.Type) -> np.ndarray
1039
+ return result .astype (inference , copy = False )
1036
1040
1037
1041
return self ._get_cythonized_result ('group_any_all' , self .grouper ,
1038
1042
aggregate = True ,
@@ -1688,6 +1692,75 @@ def nth(self, n, dropna=None):
1688
1692
1689
1693
return result
1690
1694
1695
+ def quantile (self , q = 0.5 , interpolation = 'linear' ):
1696
+ """
1697
+ Return group values at the given quantile, a la numpy.percentile.
1698
+
1699
+ Parameters
1700
+ ----------
1701
+ q : float or array-like, default 0.5 (50% quantile)
1702
+ Value(s) between 0 and 1 providing the quantile(s) to compute.
1703
+ interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
1704
+ Method to use when the desired quantile falls between two points.
1705
+
1706
+ Returns
1707
+ -------
1708
+ Series or DataFrame
1709
+ Return type determined by caller of GroupBy object.
1710
+
1711
+ See Also
1712
+ --------
1713
+ Series.quantile : Similar method for Series.
1714
+ DataFrame.quantile : Similar method for DataFrame.
1715
+ numpy.percentile : NumPy method to compute qth percentile.
1716
+
1717
+ Examples
1718
+ --------
1719
+ >>> df = pd.DataFrame([
1720
+ ... ['a', 1], ['a', 2], ['a', 3],
1721
+ ... ['b', 1], ['b', 3], ['b', 5]
1722
+ ... ], columns=['key', 'val'])
1723
+ >>> df.groupby('key').quantile()
1724
+ val
1725
+ key
1726
+ a 2.0
1727
+ b 3.0
1728
+ """
1729
+
1730
+ def pre_processor (vals ):
1731
+ # type: np.ndarray -> (np.ndarray, Optional[typing.Type])
1732
+ if is_object_dtype (vals ):
1733
+ raise TypeError ("'quantile' cannot be performed against "
1734
+ "'object' dtypes!" )
1735
+
1736
+ inference = None
1737
+ if is_integer_dtype (vals ):
1738
+ inference = np .int64
1739
+ elif is_datetime64_dtype (vals ):
1740
+ inference = 'datetime64[ns]'
1741
+ vals = vals .astype (np .float )
1742
+
1743
+ return vals , inference
1744
+
1745
+ def post_processor (vals , inference ):
1746
+ # type: (np.ndarray, Optional[typing.Type]) -> np.ndarray
1747
+ if inference :
1748
+ # Check for edge case
1749
+ if not (is_integer_dtype (inference ) and
1750
+ interpolation in {'linear' , 'midpoint' }):
1751
+ vals = vals .astype (inference )
1752
+
1753
+ return vals
1754
+
1755
+ return self ._get_cythonized_result ('group_quantile' , self .grouper ,
1756
+ aggregate = True ,
1757
+ needs_values = True ,
1758
+ needs_mask = True ,
1759
+ cython_dtype = np .float64 ,
1760
+ pre_processing = pre_processor ,
1761
+ post_processing = post_processor ,
1762
+ q = q , interpolation = interpolation )
1763
+
1691
1764
@Substitution (name = 'groupby' )
1692
1765
def ngroup (self , ascending = True ):
1693
1766
"""
@@ -1924,10 +1997,16 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
1924
1997
Whether the result of the Cython operation is an index of
1925
1998
values to be retrieved, instead of the actual values themselves
1926
1999
pre_processing : function, default None
1927
- Function to be applied to `values` prior to passing to Cython
1928
- Raises if `needs_values` is False
2000
+ Function to be applied to `values` prior to passing to Cython.
2001
+ Function should return a tuple where the first element is the
2002
+ values to be passed to Cython and the second element is an optional
2003
+ type which the values should be converted to after being returned
2004
+ by the Cython operation. Raises if `needs_values` is False.
1929
2005
post_processing : function, default None
1930
- Function to be applied to result of Cython function
2006
+ Function to be applied to result of Cython function. Should accept
2007
+ an array of values as the first argument and type inferences as its
2008
+ second argument, i.e. the signature should be
2009
+ (ndarray, typing.Type).
1931
2010
**kwargs : dict
1932
2011
Extra arguments to be passed back to Cython funcs
1933
2012
@@ -1963,10 +2042,12 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
1963
2042
1964
2043
result = np .zeros (result_sz , dtype = cython_dtype )
1965
2044
func = partial (base_func , result , labels )
2045
+ inferences = None
2046
+
1966
2047
if needs_values :
1967
2048
vals = obj .values
1968
2049
if pre_processing :
1969
- vals = pre_processing (vals )
2050
+ vals , inferences = pre_processing (vals )
1970
2051
func = partial (func , vals )
1971
2052
1972
2053
if needs_mask :
@@ -1982,7 +2063,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False,
1982
2063
result = algorithms .take_nd (obj .values , result )
1983
2064
1984
2065
if post_processing :
1985
- result = post_processing (result )
2066
+ result = post_processing (result , inferences )
1986
2067
1987
2068
output [name ] = result
1988
2069
0 commit comments