@@ -4808,256 +4808,79 @@ def aggregate(self, func, axis=0, *args, **kwargs):
4808
4808
4809
4809
agg = aggregate
4810
4810
4811
- def apply (self , func , axis = 0 , broadcast = False , raw = False , reduce = None ,
4812
- args = (), ** kwds ):
4813
- """
4814
- Applies function along input axis of DataFrame.
4815
-
4816
- Objects passed to functions are Series objects having index
4817
- either the DataFrame's index (axis=0) or the columns (axis=1).
4818
- Return type depends on whether passed function aggregates, or the
4819
- reduce argument if the DataFrame is empty.
4820
-
4821
- Parameters
4822
- ----------
4823
- func : function
4824
- Function to apply to each column/row
4825
- axis : {0 or 'index', 1 or 'columns'}, default 0
4826
- * 0 or 'index': apply function to each column
4827
- * 1 or 'columns': apply function to each row
4828
- broadcast : boolean, default False
4829
- For aggregation functions, return object of same size with values
4830
- propagated
4831
- raw : boolean, default False
4832
- If False, convert each row or column into a Series. If raw=True the
4833
- passed function will receive ndarray objects instead. If you are
4834
- just applying a NumPy reduction function this will achieve much
4835
- better performance
4836
- reduce : boolean or None, default None
4837
- Try to apply reduction procedures. If the DataFrame is empty,
4838
- apply will use reduce to determine whether the result should be a
4839
- Series or a DataFrame. If reduce is None (the default), apply's
4840
- return value will be guessed by calling func an empty Series (note:
4841
- while guessing, exceptions raised by func will be ignored). If
4842
- reduce is True a Series will always be returned, and if False a
4843
- DataFrame will always be returned.
4844
- args : tuple
4845
- Positional arguments to pass to function in addition to the
4846
- array/series
4847
- Additional keyword arguments will be passed as keywords to the function
4848
-
4849
- Notes
4850
- -----
4851
- In the current implementation apply calls func twice on the
4852
- first column/row to decide whether it can take a fast or slow
4853
- code path. This can lead to unexpected behavior if func has
4854
- side-effects, as they will take effect twice for the first
4855
- column/row.
4856
-
4857
- Examples
4858
- --------
4859
- >>> df.apply(numpy.sqrt) # returns DataFrame
4860
- >>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0)
4861
- >>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1)
4862
-
4863
- See also
4864
- --------
4865
- DataFrame.applymap: For elementwise operations
4866
- DataFrame.aggregate: only perform aggregating type operations
4867
- DataFrame.transform: only perform transformating type operations
4868
-
4869
- Returns
4870
- -------
4871
- applied : Series or DataFrame
4872
- """
4873
- axis = self ._get_axis_number (axis )
4874
- ignore_failures = kwds .pop ('ignore_failures' , False )
4875
-
4876
- # dispatch to agg
4877
- if axis == 0 and isinstance (func , (list , dict )):
4878
- return self .aggregate (func , axis = axis , * args , ** kwds )
4879
-
4880
- if len (self .columns ) == 0 and len (self .index ) == 0 :
4881
- return self ._apply_empty_result (func , axis , reduce , * args , ** kwds )
4882
-
4883
- # if we are a string, try to dispatch
4884
- if isinstance (func , compat .string_types ):
4885
- if axis :
4886
- kwds ['axis' ] = axis
4887
- return getattr (self , func )(* args , ** kwds )
4888
-
4889
- if kwds or args and not isinstance (func , np .ufunc ):
4890
- def f (x ):
4891
- return func (x , * args , ** kwds )
4892
- else :
4893
- f = func
4894
-
4895
- if isinstance (f , np .ufunc ):
4896
- with np .errstate (all = 'ignore' ):
4897
- results = f (self .values )
4898
- return self ._constructor (data = results , index = self .index ,
4899
- columns = self .columns , copy = False )
4900
- else :
4901
- if not broadcast :
4902
- if not all (self .shape ):
4903
- return self ._apply_empty_result (func , axis , reduce , * args ,
4904
- ** kwds )
4905
-
4906
- if raw and not self ._is_mixed_type :
4907
- return self ._apply_raw (f , axis )
4908
- else :
4909
- if reduce is None :
4910
- reduce = True
4911
- return self ._apply_standard (
4912
- f , axis ,
4913
- reduce = reduce ,
4914
- ignore_failures = ignore_failures )
4915
- else :
4916
- return self ._apply_broadcast (f , axis )
4811
+ _shared_docs ['apply' ] = ("""
4812
+ Applies function along input axis of DataFrame.
4917
4813
4918
- def _apply_empty_result (self , func , axis , reduce , * args , ** kwds ):
4919
- if reduce is None :
4920
- reduce = False
4921
- try :
4922
- reduce = not isinstance (func (_EMPTY_SERIES , * args , ** kwds ),
4923
- Series )
4924
- except Exception :
4925
- pass
4814
+ Objects passed to functions are Series objects having index
4815
+ either the DataFrame's index (axis=0) or the columns (axis=1).
4816
+ Return type depends on whether passed function aggregates, or the
4817
+ reduce argument if the DataFrame is empty.
4926
4818
4927
- if reduce :
4928
- return Series (np .nan , index = self ._get_agg_axis (axis ))
4929
- else :
4930
- return self .copy ()
4931
-
4932
- def _apply_raw (self , func , axis ):
4933
- try :
4934
- result = lib .reduce (self .values , func , axis = axis )
4935
- except Exception :
4936
- result = np .apply_along_axis (func , axis , self .values )
4937
-
4938
- # TODO: mixed type case
4939
- if result .ndim == 2 :
4940
- return DataFrame (result , index = self .index , columns = self .columns )
4941
- else :
4942
- return Series (result , index = self ._get_agg_axis (axis ))
4943
-
4944
- def _apply_standard (self , func , axis , ignore_failures = False , reduce = True ):
4945
-
4946
- # skip if we are mixed datelike and trying reduce across axes
4947
- # GH6125
4948
- if (reduce and axis == 1 and self ._is_mixed_type and
4949
- self ._is_datelike_mixed_type ):
4950
- reduce = False
4951
-
4952
- # try to reduce first (by default)
4953
- # this only matters if the reduction in values is of different dtype
4954
- # e.g. if we want to apply to a SparseFrame, then can't directly reduce
4955
- if reduce :
4956
- values = self .values
4957
-
4958
- # we cannot reduce using non-numpy dtypes,
4959
- # as demonstrated in gh-12244
4960
- if not is_extension_type (values ):
4961
- # Create a dummy Series from an empty array
4962
- index = self ._get_axis (axis )
4963
- empty_arr = np .empty (len (index ), dtype = values .dtype )
4964
- dummy = Series (empty_arr , index = self ._get_axis (axis ),
4965
- dtype = values .dtype )
4966
-
4967
- try :
4968
- labels = self ._get_agg_axis (axis )
4969
- result = lib .reduce (values , func , axis = axis , dummy = dummy ,
4970
- labels = labels )
4971
- return Series (result , index = labels )
4972
- except Exception :
4973
- pass
4974
-
4975
- dtype = object if self ._is_mixed_type else None
4976
- if axis == 0 :
4977
- series_gen = (self ._ixs (i , axis = 1 )
4978
- for i in range (len (self .columns )))
4979
- res_index = self .columns
4980
- res_columns = self .index
4981
- elif axis == 1 :
4982
- res_index = self .index
4983
- res_columns = self .columns
4984
- values = self .values
4985
- series_gen = (Series ._from_array (arr , index = res_columns , name = name ,
4986
- dtype = dtype )
4987
- for i , (arr , name ) in enumerate (zip (values ,
4988
- res_index )))
4989
- else : # pragma : no cover
4990
- raise AssertionError ('Axis must be 0 or 1, got %s' % str (axis ))
4991
-
4992
- i = None
4993
- keys = []
4994
- results = {}
4995
- if ignore_failures :
4996
- successes = []
4997
- for i , v in enumerate (series_gen ):
4998
- try :
4999
- results [i ] = func (v )
5000
- keys .append (v .name )
5001
- successes .append (i )
5002
- except Exception :
5003
- pass
5004
- # so will work with MultiIndex
5005
- if len (successes ) < len (res_index ):
5006
- res_index = res_index .take (successes )
5007
- else :
5008
- try :
5009
- for i , v in enumerate (series_gen ):
5010
- results [i ] = func (v )
5011
- keys .append (v .name )
5012
- except Exception as e :
5013
- if hasattr (e , 'args' ):
5014
- # make sure i is defined
5015
- if i is not None :
5016
- k = res_index [i ]
5017
- e .args = e .args + ('occurred at index %s' %
5018
- pprint_thing (k ), )
5019
- raise
5020
-
5021
- if len (results ) > 0 and is_sequence (results [0 ]):
5022
- if not isinstance (results [0 ], Series ):
5023
- index = res_columns
5024
- else :
5025
- index = None
5026
-
5027
- result = self ._constructor (data = results , index = index )
5028
- result .columns = res_index
5029
-
5030
- if axis == 1 :
5031
- result = result .T
5032
- result = result ._convert (datetime = True , timedelta = True , copy = False )
5033
-
5034
- else :
5035
-
5036
- result = Series (results )
5037
- result .index = res_index
5038
-
5039
- return result
5040
-
5041
- def _apply_broadcast (self , func , axis ):
5042
- if axis == 0 :
5043
- target = self
5044
- elif axis == 1 :
5045
- target = self .T
5046
- else : # pragma: no cover
5047
- raise AssertionError ('Axis must be 0 or 1, got %s' % axis )
4819
+ Parameters
4820
+ ----------
4821
+ func : function
4822
+ Function to apply to each column/row
4823
+ axis : {0 or 'index', 1 or 'columns'}, default 0
4824
+ * 0 or 'index': apply function to each column
4825
+ * 1 or 'columns': apply function to each row
4826
+ broadcast : boolean, default False
4827
+ For aggregation functions, return object of same size with values
4828
+ propagated
4829
+ raw : boolean, default False
4830
+ If False, convert each row or column into a Series. If raw=True the
4831
+ passed function will receive ndarray objects instead. If you are
4832
+ just applying a NumPy reduction function this will achieve much
4833
+ better performance
4834
+ reduce : boolean or None, default None
4835
+ Try to apply reduction procedures. If the DataFrame is empty,
4836
+ apply will use reduce to determine whether the result should be a
4837
+ Series or a DataFrame. If reduce is None (the default), apply's
4838
+ return value will be guessed by calling func an empty Series (note:
4839
+ while guessing, exceptions raised by func will be ignored). If
4840
+ reduce is True a Series will always be returned, and if False a
4841
+ DataFrame will always be returned.
4842
+ args : tuple
4843
+ Positional arguments to pass to function in addition to the
4844
+ array/series
4845
+ Additional keyword arguments will be passed as keywords to the function
4846
+
4847
+ Notes
4848
+ -----
4849
+ In the current implementation apply calls func twice on the
4850
+ first column/row to decide whether it can take a fast or slow
4851
+ code path. This can lead to unexpected behavior if func has
4852
+ side-effects, as they will take effect twice for the first
4853
+ column/row.
5048
4854
5049
- result_values = np .empty_like (target .values )
5050
- columns = target .columns
5051
- for i , col in enumerate (columns ):
5052
- result_values [:, i ] = func (target [col ])
4855
+ Examples
4856
+ --------
4857
+ >>> df.apply(numpy.sqrt) # returns DataFrame
4858
+ >>> df.apply(numpy.sum, axis=0) # equiv to df.sum(0)
4859
+ >>> df.apply(numpy.sum, axis=1) # equiv to df.sum(1)
5053
4860
5054
- result = self ._constructor (result_values , index = target .index ,
5055
- columns = target .columns )
4861
+ See also
4862
+ --------
4863
+ DataFrame.applymap: For elementwise operations
4864
+ DataFrame.aggregate: only perform aggregating type operations
4865
+ DataFrame.transform: only perform transformating type operations
5056
4866
5057
- if axis == 1 :
5058
- result = result .T
4867
+ Returns
4868
+ -------
4869
+ applied : Series or DataFrame
4870
+ """ )
5059
4871
5060
- return result
4872
+ @Appender (_shared_docs ['apply' ])
4873
+ def apply (self , func , axis = 0 , broadcast = False , raw = False , reduce = None ,
4874
+ args = (), ** kwds ):
4875
+ from pandas .core .apply import frame_apply
4876
+ op = frame_apply (self ,
4877
+ func = func ,
4878
+ axis = axis ,
4879
+ broadcast = broadcast ,
4880
+ raw = raw ,
4881
+ reduce = reduce ,
4882
+ args = args , ** kwds )
4883
+ return op .get_result ()
5061
4884
5062
4885
def applymap (self , func ):
5063
4886
"""
@@ -6189,8 +6012,6 @@ def isin(self, values):
6189
6012
ops .add_flex_arithmetic_methods (DataFrame , ** ops .frame_flex_funcs )
6190
6013
ops .add_special_arithmetic_methods (DataFrame , ** ops .frame_special_funcs )
6191
6014
6192
- _EMPTY_SERIES = Series ([])
6193
-
6194
6015
6195
6016
def _arrays_to_mgr (arrays , arr_names , index , columns , dtype = None ):
6196
6017
"""
0 commit comments