1
1
import types
2
- from functools import wraps
2
+ from functools import wraps , partial
3
3
import numpy as np
4
4
import datetime
5
5
import collections
38
38
_ensure_float )
39
39
from pandas .core .dtypes .cast import maybe_downcast_to_dtype
40
40
from pandas .core .dtypes .generic import ABCSeries
41
- from pandas .core .dtypes .missing import isna , notna , _maybe_fill
41
+ from pandas .core .dtypes .missing import isna , isnull , notna , _maybe_fill
42
42
43
43
from pandas .core .base import (PandasObject , SelectionMixin , GroupByError ,
44
44
DataError , SpecificationError )
@@ -1457,6 +1457,36 @@ def expanding(self, *args, **kwargs):
1457
1457
from pandas .core .window import ExpandingGroupby
1458
1458
return ExpandingGroupby (self , * args , ** kwargs )
1459
1459
1460
+ def _fill (self , direction , limit = None ):
1461
+ """Shared function for `pad` and `backfill` to call Cython method
1462
+
1463
+ Parameters
1464
+ ----------
1465
+ direction : {'ffill', 'bfill'}
1466
+ Direction passed to underlying Cython function. `bfill` will cause
1467
+ values to be filled backwards. `ffill` and any other values will
1468
+ default to a forward fill
1469
+ limit : int, default None
1470
+ Maximum number of consecutive values to fill. If `None`, this
1471
+ method will convert to -1 prior to passing to Cython
1472
+
1473
+ Returns
1474
+ -------
1475
+ `Series` or `DataFrame` with filled values
1476
+
1477
+ See Also
1478
+ --------
1479
+ pad
1480
+ backfill
1481
+ """
1482
+ # Need int value for Cython
1483
+ if limit is None :
1484
+ limit = - 1
1485
+
1486
+ return self ._get_cythonized_result ('group_fillna_indexer' ,
1487
+ self .grouper , needs_mask = True ,
1488
+ direction = direction , limit = limit )
1489
+
1460
1490
@Substitution (name = 'groupby' )
1461
1491
def pad (self , limit = None ):
1462
1492
"""
@@ -1474,7 +1504,7 @@ def pad(self, limit=None):
1474
1504
Series.fillna
1475
1505
DataFrame.fillna
1476
1506
"""
1477
- return self .apply ( lambda x : x . ffill ( limit = limit ) )
1507
+ return self ._fill ( ' ffill' , limit = limit )
1478
1508
ffill = pad
1479
1509
1480
1510
@Substitution (name = 'groupby' )
@@ -1494,7 +1524,7 @@ def backfill(self, limit=None):
1494
1524
Series.fillna
1495
1525
DataFrame.fillna
1496
1526
"""
1497
- return self .apply ( lambda x : x . bfill ( limit = limit ) )
1527
+ return self ._fill ( ' bfill' , limit = limit )
1498
1528
bfill = backfill
1499
1529
1500
1530
@Substitution (name = 'groupby' )
@@ -1843,6 +1873,45 @@ def cummax(self, axis=0, **kwargs):
1843
1873
1844
1874
return self ._cython_transform ('cummax' , numeric_only = False )
1845
1875
1876
+ def _get_cythonized_result (self , how , grouper , needs_mask = False ,
1877
+ needs_ngroups = False , ** kwargs ):
1878
+ """Get result for Cythonized functions
1879
+
1880
+ Parameters
1881
+ ----------
1882
+ how : str, Cythonized function name to be called
1883
+ grouper : Grouper object containing pertinent group info
1884
+ needs_mask : bool, default False
1885
+ Whether boolean mask needs to be part of the Cython call signature
1886
+ needs_ngroups : bool, default False
1887
+ Whether number of groups part of the Cython call signature
1888
+ **kwargs : dict
1889
+ Extra arguments to be passed back to Cython funcs
1890
+
1891
+ Returns
1892
+ -------
1893
+ `Series` or `DataFrame` with filled values
1894
+ """
1895
+
1896
+ labels , _ , ngroups = grouper .group_info
1897
+ output = collections .OrderedDict ()
1898
+ base_func = getattr (libgroupby , how )
1899
+
1900
+ for name , obj in self ._iterate_slices ():
1901
+ indexer = np .zeros_like (labels , dtype = np .int64 )
1902
+ func = partial (base_func , indexer , labels )
1903
+ if needs_mask :
1904
+ mask = isnull (obj .values ).view (np .uint8 )
1905
+ func = partial (func , mask )
1906
+
1907
+ if needs_ngroups :
1908
+ func = partial (func , ngroups )
1909
+
1910
+ func (** kwargs ) # Call func to modify indexer values in place
1911
+ output [name ] = algorithms .take_nd (obj .values , indexer )
1912
+
1913
+ return self ._wrap_transformed_output (output )
1914
+
1846
1915
@Substitution (name = 'groupby' )
1847
1916
@Appender (_doc_template )
1848
1917
def shift (self , periods = 1 , freq = None , axis = 0 ):
@@ -1860,17 +1929,9 @@ def shift(self, periods=1, freq=None, axis=0):
1860
1929
if freq is not None or axis != 0 :
1861
1930
return self .apply (lambda x : x .shift (periods , freq , axis ))
1862
1931
1863
- labels , _ , ngroups = self .grouper .group_info
1864
-
1865
- # filled in by Cython
1866
- indexer = np .zeros_like (labels )
1867
- libgroupby .group_shift_indexer (indexer , labels , ngroups , periods )
1868
-
1869
- output = {}
1870
- for name , obj in self ._iterate_slices ():
1871
- output [name ] = algorithms .take_nd (obj .values , indexer )
1872
-
1873
- return self ._wrap_transformed_output (output )
1932
+ return self ._get_cythonized_result ('group_shift_indexer' ,
1933
+ self .grouper , needs_ngroups = True ,
1934
+ periods = periods )
1874
1935
1875
1936
@Substitution (name = 'groupby' )
1876
1937
@Appender (_doc_template )
@@ -3577,7 +3638,6 @@ def describe(self, **kwargs):
3577
3638
def value_counts (self , normalize = False , sort = True , ascending = False ,
3578
3639
bins = None , dropna = True ):
3579
3640
3580
- from functools import partial
3581
3641
from pandas .core .reshape .tile import cut
3582
3642
from pandas .core .reshape .merge import _get_join_indexers
3583
3643
@@ -4585,9 +4645,17 @@ def _apply_to_column_groupbys(self, func):
4585
4645
in self ._iterate_column_groupbys ()),
4586
4646
keys = self ._selected_obj .columns , axis = 1 )
4587
4647
4648
+ def _fill (self , direction , limit = None ):
4649
+ """Overriden method to join grouped columns in output"""
4650
+ res = super (DataFrameGroupBy , self )._fill (direction , limit = limit )
4651
+ output = collections .OrderedDict (
4652
+ (grp .name , grp .grouper ) for grp in self .grouper .groupings )
4653
+
4654
+ from pandas import concat
4655
+ return concat ((self ._wrap_transformed_output (output ), res ), axis = 1 )
4656
+
4588
4657
def count (self ):
4589
4658
""" Compute count of group, excluding missing values """
4590
- from functools import partial
4591
4659
from pandas .core .dtypes .missing import _isna_ndarraylike as isna
4592
4660
4593
4661
data , _ = self ._get_data_to_aggregate ()
0 commit comments