1
1
import types
2
- from functools import wraps
2
+ from functools import wraps , partial
3
3
import numpy as np
4
4
import datetime
5
5
import collections
@@ -1457,25 +1457,14 @@ def expanding(self, *args, **kwargs):
1457
1457
from pandas .core .window import ExpandingGroupby
1458
1458
return ExpandingGroupby (self , * args , ** kwargs )
1459
1459
1460
- def _fill (self , how , limit = None ):
1461
- labels , _ , _ = self .grouper .group_info
1462
-
1460
+ def _fill (self , direction , limit = None ):
1463
1461
# Need int value for Cython
1464
1462
if limit is None :
1465
1463
limit = - 1
1466
- output = {}
1467
- if type (self ) is DataFrameGroupBy :
1468
- for grp in self .grouper .groupings :
1469
- ser = grp .group_index .take (grp .labels )
1470
- output [ser .name ] = ser .values
1471
- for name , obj in self ._iterate_slices ():
1472
- indexer = np .zeros_like (labels )
1473
- mask = isnull (obj .values ).view (np .uint8 )
1474
- libgroupby .group_fillna_indexer (indexer , mask , labels , how ,
1475
- limit )
1476
- output [name ] = algorithms .take_nd (obj .values , indexer )
1477
1464
1478
- return self ._wrap_transformed_output (output )
1465
+ return self ._get_cythonized_result ('group_fillna_indexer' ,
1466
+ self .grouper , needs_mask = True ,
1467
+ direction = direction , limit = limit )
1479
1468
1480
1469
@Substitution (name = 'groupby' )
1481
1470
def pad (self , limit = None ):
@@ -1863,6 +1852,52 @@ def cummax(self, axis=0, **kwargs):
1863
1852
1864
1853
return self ._cython_transform ('cummax' , numeric_only = False )
1865
1854
1855
+ def _get_cythonized_result (self , how , grouper , needs_mask = False ,
1856
+ needs_ngroups = False , ** kwargs ):
1857
+ """Get result for Cythonized functions
1858
+
1859
+ Parameters
1860
+ ----------
1861
+ how : str, Cythonized function name to be called
1862
+ grouper : Grouper object containing pertinent group info
1863
+ needs_mask : bool, default False
1864
+ Whether boolean mask needs to be part of the Cython call signature
1865
+ needs_ngroups : bool, default False
1866
+ Whether number of groups part of the Cython call signature
1867
+ **kwargs : dict
1868
+ Extra arguments required for the given function. This method
1869
+ internally stores an OrderedDict that maps those keywords to
1870
+ positional arguments before calling the Cython layer
1871
+
1872
+ Returns
1873
+ -------
1874
+ GroupBy object populated with appropriate result(s)
1875
+ """
1876
+ exp_kwds = collections .OrderedDict ([
1877
+ (('group_fillna_indexer' ), ('direction' , 'limit' )),
1878
+ (('group_shift_indexer' ), ('nperiods' ,))])
1879
+
1880
+ labels , _ , ngroups = grouper .group_info
1881
+ output = collections .OrderedDict ()
1882
+ base_func = getattr (libgroupby , how )
1883
+
1884
+ for name , obj in self ._iterate_slices ():
1885
+ indexer = np .zeros_like (labels )
1886
+ func = partial (base_func , indexer , labels )
1887
+ if needs_mask :
1888
+ mask = isnull (obj .values ).astype (np .uint8 , copy = False )
1889
+ func = partial (func , mask )
1890
+
1891
+ if needs_ngroups :
1892
+ func = partial (func , ngroups )
1893
+
1894
+ # Convert any keywords into positional arguments
1895
+ func = partial (func , * (kwargs [x ] for x in exp_kwds [how ]))
1896
+ func () # Call func to modify indexer values in place
1897
+ output [name ] = algorithms .take_nd (obj .values , indexer )
1898
+
1899
+ return self ._wrap_transformed_output (output )
1900
+
1866
1901
@Substitution (name = 'groupby' )
1867
1902
@Appender (_doc_template )
1868
1903
def shift (self , periods = 1 , freq = None , axis = 0 ):
@@ -1880,17 +1915,10 @@ def shift(self, periods=1, freq=None, axis=0):
1880
1915
if freq is not None or axis != 0 :
1881
1916
return self .apply (lambda x : x .shift (periods , freq , axis ))
1882
1917
1883
- labels , _ , ngroups = self .grouper .group_info
1884
-
1885
- # filled in by Cython
1886
- indexer = np .zeros_like (labels )
1887
- libgroupby .group_shift_indexer (indexer , labels , ngroups , periods )
1918
+ return self ._get_cythonized_result ('group_shift_indexer' ,
1919
+ self .grouper , needs_ngroups = True ,
1920
+ nperiods = periods )
1888
1921
1889
- output = {}
1890
- for name , obj in self ._iterate_slices ():
1891
- output [name ] = algorithms .take_nd (obj .values , indexer )
1892
-
1893
- return self ._wrap_transformed_output (output )
1894
1922
1895
1923
@Substitution (name = 'groupby' )
1896
1924
@Appender (_doc_template )
@@ -3597,7 +3625,6 @@ def describe(self, **kwargs):
3597
3625
def value_counts (self , normalize = False , sort = True , ascending = False ,
3598
3626
bins = None , dropna = True ):
3599
3627
3600
- from functools import partial
3601
3628
from pandas .core .reshape .tile import cut
3602
3629
from pandas .core .reshape .merge import _get_join_indexers
3603
3630
@@ -4605,9 +4632,18 @@ def _apply_to_column_groupbys(self, func):
4605
4632
in self ._iterate_column_groupbys ()),
4606
4633
keys = self ._selected_obj .columns , axis = 1 )
4607
4634
4635
+ def _fill (self , direction , limit = None ):
4636
+ """Overriden method to concat grouped columns in output"""
4637
+ res = super ()._fill (direction , limit = limit )
4638
+ output = collections .OrderedDict ()
4639
+ for grp in self .grouper .groupings :
4640
+ ser = grp .group_index .take (grp .labels )
4641
+ output [ser .name ] = ser .values
4642
+
4643
+ return self ._wrap_transformed_output (output ).join (res )
4644
+
4608
4645
def count (self ):
4609
4646
""" Compute count of group, excluding missing values """
4610
- from functools import partial
4611
4647
from pandas .core .dtypes .missing import _isna_ndarraylike as isna
4612
4648
4613
4649
data , _ = self ._get_data_to_aggregate ()
0 commit comments