4
4
import numpy as np
5
5
import pytest
6
6
7
- from pandas ._libs import lib
8
7
from pandas .compat import IS64
9
8
from pandas .errors import (
10
9
PerformanceWarning ,
@@ -909,64 +908,37 @@ def test_keep_nuisance_agg(df, agg_function):
909
908
"agg_function" ,
910
909
["sum" , "mean" , "prod" , "std" , "var" , "sem" , "median" ],
911
910
)
912
- @pytest .mark .parametrize ("numeric_only" , [lib . no_default , True , False ])
911
+ @pytest .mark .parametrize ("numeric_only" , [True , False ])
913
912
def test_omit_nuisance_agg (df , agg_function , numeric_only ):
914
913
# GH 38774, GH 38815
915
- if numeric_only is lib .no_default or (not numeric_only and agg_function != "sum" ):
916
- # sum doesn't drop strings
917
- warn = FutureWarning
918
- else :
919
- warn = None
920
-
921
914
grouped = df .groupby ("A" )
922
915
923
916
no_drop_nuisance = ("var" , "std" , "sem" , "mean" , "prod" , "median" )
924
- if agg_function in no_drop_nuisance and numeric_only is False :
917
+ if agg_function in no_drop_nuisance and not numeric_only :
925
918
# Added numeric_only as part of GH#46560; these do not drop nuisance
926
919
# columns when numeric_only is False
927
920
klass = ValueError if agg_function in ("std" , "sem" ) else TypeError
928
921
msg = "|" .join (["[C|c]ould not convert" , "can't multiply sequence" ])
929
922
with pytest .raises (klass , match = msg ):
930
923
getattr (grouped , agg_function )(numeric_only = numeric_only )
931
924
else :
932
- if numeric_only is lib .no_default :
933
- msg = (
934
- f"The default value of numeric_only in DataFrameGroupBy.{ agg_function } "
935
- )
936
- else :
937
- msg = "Dropping invalid columns"
938
- with tm .assert_produces_warning (warn , match = msg ):
939
- result = getattr (grouped , agg_function )(numeric_only = numeric_only )
940
- if (
941
- (numeric_only is lib .no_default or not numeric_only )
942
- # These methods drop non-numeric columns even when numeric_only is False
943
- and agg_function not in ("mean" , "prod" , "median" )
944
- ):
925
+ result = getattr (grouped , agg_function )(numeric_only = numeric_only )
926
+ if not numeric_only and agg_function == "sum" :
927
+ # sum is successful on column B
945
928
columns = ["A" , "B" , "C" , "D" ]
946
929
else :
947
930
columns = ["A" , "C" , "D" ]
948
- if agg_function == "sum" and numeric_only is False :
949
- # sum doesn't drop nuisance string columns
950
- warn = None
951
- elif agg_function in ("sum" , "std" , "var" , "sem" ) and numeric_only is not True :
952
- warn = FutureWarning
953
- else :
954
- warn = None
955
- msg = "The default value of numeric_only"
956
- with tm .assert_produces_warning (warn , match = msg ):
957
- expected = getattr (df .loc [:, columns ].groupby ("A" ), agg_function )(
958
- numeric_only = numeric_only
959
- )
931
+ expected = getattr (df .loc [:, columns ].groupby ("A" ), agg_function )(
932
+ numeric_only = numeric_only
933
+ )
960
934
tm .assert_frame_equal (result , expected )
961
935
962
936
963
- def test_omit_nuisance_warnings (df ):
937
+ def test_raise_on_nuisance_python_single (df ):
964
938
# GH 38815
965
- with tm .assert_produces_warning (FutureWarning , filter_level = "always" ):
966
- grouped = df .groupby ("A" )
967
- result = grouped .skew ()
968
- expected = df .loc [:, ["A" , "C" , "D" ]].groupby ("A" ).skew ()
969
- tm .assert_frame_equal (result , expected )
939
+ grouped = df .groupby ("A" )
940
+ with pytest .raises (TypeError , match = "could not convert" ):
941
+ grouped .skew ()
970
942
971
943
972
944
def test_raise_on_nuisance_python_multiple (three_group ):
@@ -2012,14 +1984,9 @@ def get_result(**kwargs):
2012
1984
if df .dtypes [0 ].kind == "M" :
2013
1985
# GH#41291
2014
1986
# datetime64 -> prod and sum are invalid
2015
- if op == "sum" :
2016
- with pytest .raises (
2017
- TypeError , match = "datetime64 type does not support"
2018
- ):
2019
- get_result ()
2020
- result = get_result (numeric_only = True )
2021
- else :
2022
- result = get_result ()
1987
+ with pytest .raises (TypeError , match = "datetime64 type does not support" ):
1988
+ get_result ()
1989
+ result = get_result (numeric_only = True )
2023
1990
2024
1991
# with numeric_only=True, these are dropped, and we get
2025
1992
# an empty DataFrame back
@@ -2030,14 +1997,9 @@ def get_result(**kwargs):
2030
1997
elif isinstance (values , Categorical ):
2031
1998
# GH#41291
2032
1999
# Categorical doesn't implement sum or prod
2033
- if op == "sum" :
2034
- with pytest .raises (
2035
- TypeError , match = "category type does not support"
2036
- ):
2037
- get_result ()
2038
- result = get_result (numeric_only = True )
2039
- else :
2040
- result = get_result ()
2000
+ with pytest .raises (TypeError , match = "category type does not support" ):
2001
+ get_result ()
2002
+ result = get_result (numeric_only = True )
2041
2003
2042
2004
# with numeric_only=True, these are dropped, and we get
2043
2005
# an empty DataFrame back
@@ -2053,24 +2015,22 @@ def get_result(**kwargs):
2053
2015
return
2054
2016
2055
2017
elif df .dtypes [0 ] == object :
2056
- # FIXME: the test is actually wrong here, xref #41341
2057
2018
result = get_result ()
2058
- # In this case we have list-of-list, will raise TypeError,
2059
- # and subsequently be dropped as nuisance columns
2060
- if op == "sum" :
2061
- expected = df .set_index (keys )[["C" ]]
2062
- else :
2063
- expected = df .set_index (keys )[[]]
2019
+ expected = df .set_index (keys )[["C" ]]
2064
2020
tm .assert_equal (result , expected )
2065
2021
return
2066
2022
2067
- if (
2068
- op in ["min" , "max" , "skew" ]
2069
- and isinstance (values , Categorical )
2070
- and len (keys ) == 1
2023
+ if (op in ["min" , "max" , "skew" ] and isinstance (values , Categorical )) or (
2024
+ op == "skew" and df .dtypes [0 ].kind == "M"
2071
2025
):
2072
- if op in ("min" , "max" ):
2073
- with pytest .raises (TypeError , match = "Categorical is not ordered" ):
2026
+ if op == "skew" or len (keys ) == 1 :
2027
+ msg = "|" .join (
2028
+ [
2029
+ "Categorical is not ordered" ,
2030
+ "does not support reduction" ,
2031
+ ]
2032
+ )
2033
+ with pytest .raises (TypeError , match = msg ):
2074
2034
get_result ()
2075
2035
return
2076
2036
# Categorical doesn't implement, so with numeric_only=True
0 commit comments