@@ -1858,6 +1858,7 @@ def test_pivot_table_values_key_error():
1858
1858
Categorical ([0 ]),
1859
1859
[to_datetime (0 )],
1860
1860
date_range (0 , 1 , 1 , tz = "US/Eastern" ),
1861
+ pd .period_range ("2016-01-01" , periods = 3 , freq = "D" ),
1861
1862
pd .array ([0 ], dtype = "Int64" ),
1862
1863
pd .array ([0 ], dtype = "Float64" ),
1863
1864
pd .array ([False ], dtype = "boolean" ),
@@ -1870,6 +1871,7 @@ def test_pivot_table_values_key_error():
1870
1871
"cat" ,
1871
1872
"dt64" ,
1872
1873
"dt64tz" ,
1874
+ "period" ,
1873
1875
"Int64" ,
1874
1876
"Float64" ,
1875
1877
"boolean" ,
@@ -1886,13 +1888,6 @@ def test_empty_groupby(
1886
1888
override_dtype = None
1887
1889
1888
1890
if (
1889
- isinstance (values , Categorical )
1890
- and not isinstance (columns , list )
1891
- and op in ["sum" , "prod" , "skew" ]
1892
- ):
1893
- # handled below GH#41291
1894
- pass
1895
- elif (
1896
1891
isinstance (values , Categorical )
1897
1892
and len (keys ) == 1
1898
1893
and op in ["idxmax" , "idxmin" ]
@@ -1901,18 +1896,8 @@ def test_empty_groupby(
1901
1896
raises = ValueError , match = "attempt to get arg(min|max) of an empty sequence"
1902
1897
)
1903
1898
request .node .add_marker (mark )
1904
- elif isinstance (values , Categorical ) and len (keys ) == 1 and op in ["sum" , "prod" ]:
1905
- mark = pytest .mark .xfail (
1906
- raises = AssertionError , match = "(DataFrame|Series) are different"
1907
- )
1908
- request .node .add_marker (mark )
1909
- elif isinstance (values , Categorical ) and len (keys ) == 2 and op in ["sum" ]:
1910
- mark = pytest .mark .xfail (
1911
- raises = AssertionError , match = "(DataFrame|Series) are different"
1912
- )
1913
- request .node .add_marker (mark )
1914
1899
1915
- elif isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1900
+ if isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1916
1901
# We expect to get Int64 back for these
1917
1902
override_dtype = "Int64"
1918
1903
@@ -1936,6 +1921,26 @@ def get_result(**kwargs):
1936
1921
else :
1937
1922
return getattr (gb , method )(op , ** kwargs )
1938
1923
1924
+ def get_categorical_invalid_expected ():
1925
+ # Categorical is special without 'observed=True', we get an NaN entry
1926
+ # corresponding to the unobserved group. If we passed observed=True
1927
+ # to groupby, expected would just be 'df.set_index(keys)[columns]'
1928
+ # as below
1929
+ lev = Categorical ([0 ], dtype = values .dtype )
1930
+ if len (keys ) != 1 :
1931
+ idx = MultiIndex .from_product ([lev , lev ], names = keys )
1932
+ else :
1933
+ # all columns are dropped, but we end up with one row
1934
+ # Categorical is special without 'observed=True'
1935
+ idx = Index (lev , name = keys [0 ])
1936
+
1937
+ expected = DataFrame ([], columns = [], index = idx )
1938
+ return expected
1939
+
1940
+ is_per = isinstance (df .dtypes [0 ], pd .PeriodDtype )
1941
+ is_dt64 = df .dtypes [0 ].kind == "M"
1942
+ is_cat = isinstance (values , Categorical )
1943
+
1939
1944
if isinstance (values , Categorical ) and not values .ordered and op in ["min" , "max" ]:
1940
1945
msg = f"Cannot perform { op } with non-ordered Categorical"
1941
1946
with pytest .raises (TypeError , match = msg ):
@@ -1944,105 +1949,47 @@ def get_result(**kwargs):
1944
1949
if isinstance (columns , list ):
1945
1950
# i.e. DataframeGroupBy, not SeriesGroupBy
1946
1951
result = get_result (numeric_only = True )
1947
-
1948
- # Categorical is special without 'observed=True', we get an NaN entry
1949
- # corresponding to the unobserved group. If we passed observed=True
1950
- # to groupby, expected would just be 'df.set_index(keys)[columns]'
1951
- # as below
1952
- lev = Categorical ([0 ], dtype = values .dtype )
1953
- if len (keys ) != 1 :
1954
- idx = MultiIndex .from_product ([lev , lev ], names = keys )
1955
- else :
1956
- # all columns are dropped, but we end up with one row
1957
- # Categorical is special without 'observed=True'
1958
- idx = Index (lev , name = keys [0 ])
1959
-
1960
- expected = DataFrame ([], columns = [], index = idx )
1952
+ expected = get_categorical_invalid_expected ()
1961
1953
tm .assert_equal (result , expected )
1962
1954
return
1963
1955
1964
- if columns == "C" :
1965
- # i.e. SeriesGroupBy
1966
- if op in ["prod" , "sum" , "skew" ]:
1967
- # ops that require more than just ordered-ness
1968
- if df .dtypes [0 ].kind == "M" :
1969
- # GH#41291
1970
- # datetime64 -> prod and sum are invalid
1971
- if op == "skew" :
1972
- msg = "does not support reduction 'skew'"
1973
- else :
1974
- msg = "datetime64 type does not support"
1975
- with pytest .raises (TypeError , match = msg ):
1976
- get_result ()
1977
-
1978
- return
1979
- if op in ["prod" , "sum" , "skew" ]:
1980
- if isinstance (values , Categorical ):
1981
- # GH#41291
1982
- if op == "skew" :
1983
- msg = f"does not support reduction '{ op } '"
1984
- else :
1985
- msg = "category type does not support"
1986
- with pytest .raises (TypeError , match = msg ):
1987
- get_result ()
1956
+ if op in ["prod" , "sum" , "skew" ]:
1957
+ # ops that require more than just ordered-ness
1958
+ if is_dt64 or is_cat or is_per :
1959
+ # GH#41291
1960
+ # datetime64 -> prod and sum are invalid
1961
+ if op == "skew" :
1962
+ msg = "does not support reduction 'skew'"
1963
+ elif is_dt64 :
1964
+ msg = "datetime64 type does not support"
1965
+ elif is_per :
1966
+ msg = "Period type does not support"
1967
+ else :
1968
+ msg = "category type does not support"
1969
+ with pytest .raises (TypeError , match = msg ):
1970
+ get_result ()
1988
1971
1972
+ if not isinstance (columns , list ):
1973
+ # i.e. SeriesGroupBy
1989
1974
return
1990
- else :
1991
- # ie. DataFrameGroupBy
1992
- if op in ["prod" , "sum" ]:
1993
- # ops that require more than just ordered-ness
1994
- if df .dtypes [0 ].kind == "M" :
1995
- # GH#41291
1996
- # datetime64 -> prod and sum are invalid
1997
- with pytest .raises (TypeError , match = "datetime64 type does not support" ):
1998
- get_result ()
1999
- result = get_result (numeric_only = True )
2000
-
2001
- # with numeric_only=True, these are dropped, and we get
2002
- # an empty DataFrame back
2003
- expected = df .set_index (keys )[[]]
2004
- tm .assert_equal (result , expected )
1975
+ elif op == "skew" :
1976
+ # TODO: test the numeric_only=True case
2005
1977
return
2006
-
2007
- elif isinstance (values , Categorical ):
1978
+ else :
1979
+ # i.e. op in ["prod", "sum"]:
1980
+ # i.e. DataFrameGroupBy
1981
+ # ops that require more than just ordered-ness
2008
1982
# GH#41291
2009
- # Categorical doesn't implement sum or prod
2010
- with pytest .raises (TypeError , match = "category type does not support" ):
2011
- get_result ()
2012
1983
result = get_result (numeric_only = True )
2013
1984
2014
1985
# with numeric_only=True, these are dropped, and we get
2015
1986
# an empty DataFrame back
2016
1987
expected = df .set_index (keys )[[]]
2017
- if len (keys ) != 1 and op == "prod" :
2018
- # TODO: why just prod and not sum?
2019
- # Categorical is special without 'observed=True'
2020
- lev = Categorical ([0 ], dtype = values .dtype )
2021
- mi = MultiIndex .from_product ([lev , lev ], names = ["A" , "B" ])
2022
- expected = DataFrame ([], columns = [], index = mi )
2023
-
2024
- tm .assert_equal (result , expected )
2025
- return
2026
-
2027
- elif df .dtypes [0 ] == object :
2028
- result = get_result ()
2029
- expected = df .set_index (keys )[["C" ]]
1988
+ if is_cat :
1989
+ expected = get_categorical_invalid_expected ()
2030
1990
tm .assert_equal (result , expected )
2031
1991
return
2032
1992
2033
- if op == "skew" and (
2034
- isinstance (values , Categorical ) or df .dtypes [0 ].kind == "M"
2035
- ):
2036
- msg = "|" .join (
2037
- [
2038
- "Categorical is not ordered" ,
2039
- "does not support reduction" ,
2040
- ]
2041
- )
2042
- with pytest .raises (TypeError , match = msg ):
2043
- get_result ()
2044
- return
2045
-
2046
1993
result = get_result ()
2047
1994
expected = df .set_index (keys )[columns ]
2048
1995
if override_dtype is not None :
0 commit comments