24
24
to_datetime ,
25
25
)
26
26
import pandas ._testing as tm
27
+ from pandas .core .arrays import (
28
+ BooleanArray ,
29
+ FloatingArray ,
30
+ IntegerArray ,
31
+ )
27
32
from pandas .core .base import SpecificationError
28
33
import pandas .core .common as com
29
34
@@ -1822,17 +1827,23 @@ def test_pivot_table_values_key_error():
1822
1827
)
1823
1828
@pytest .mark .filterwarnings ("ignore:Dropping invalid columns:FutureWarning" )
1824
1829
@pytest .mark .filterwarnings ("ignore:.*Select only valid:FutureWarning" )
1825
- def test_empty_groupby (columns , keys , values , method , op , request ):
1830
+ def test_empty_groupby (columns , keys , values , method , op , request , using_array_manager ):
1826
1831
# GH8093 & GH26411
1827
1832
override_dtype = None
1828
1833
1829
1834
if (
1830
1835
isinstance (values , Categorical )
1831
1836
and not isinstance (columns , list )
1832
- and op in ["sum" , "prod" ]
1837
+ and op in ["sum" , "prod" , "skew" , "mad" ]
1833
1838
):
1834
1839
# handled below GH#41291
1835
- pass
1840
+
1841
+ if using_array_manager and op == "mad" :
1842
+ right_msg = "Cannot interpret 'CategoricalDtype.* as a data type"
1843
+ msg = "Regex pattern \" 'Categorical' does not implement.*" + right_msg
1844
+ mark = pytest .mark .xfail (raises = AssertionError , match = msg )
1845
+ request .node .add_marker (mark )
1846
+
1836
1847
elif (
1837
1848
isinstance (values , Categorical )
1838
1849
and len (keys ) == 1
@@ -1851,11 +1862,7 @@ def test_empty_groupby(columns, keys, values, method, op, request):
1851
1862
raises = TypeError , match = "'Categorical' does not implement"
1852
1863
)
1853
1864
request .node .add_marker (mark )
1854
- elif (
1855
- isinstance (values , Categorical )
1856
- and len (keys ) == 1
1857
- and op in ["mad" , "min" , "max" , "sum" , "prod" , "skew" ]
1858
- ):
1865
+ elif isinstance (values , Categorical ) and len (keys ) == 1 and op in ["sum" , "prod" ]:
1859
1866
mark = pytest .mark .xfail (
1860
1867
raises = AssertionError , match = "(DataFrame|Series) are different"
1861
1868
)
@@ -1869,7 +1876,30 @@ def test_empty_groupby(columns, keys, values, method, op, request):
1869
1876
raises = AssertionError , match = "(DataFrame|Series) are different"
1870
1877
)
1871
1878
request .node .add_marker (mark )
1872
- elif isinstance (values , pd .core .arrays .BooleanArray ) and op in ["sum" , "prod" ]:
1879
+ elif (
1880
+ isinstance (values , (IntegerArray , FloatingArray ))
1881
+ and op == "mad"
1882
+ and isinstance (columns , list )
1883
+ ):
1884
+ mark = pytest .mark .xfail (
1885
+ raises = TypeError , match = "can only perform ops with numeric values"
1886
+ )
1887
+ request .node .add_marker (mark )
1888
+
1889
+ elif (
1890
+ op == "mad"
1891
+ and not isinstance (columns , list )
1892
+ and isinstance (values , pd .DatetimeIndex )
1893
+ and values .tz is not None
1894
+ and using_array_manager
1895
+ ):
1896
+ mark = pytest .mark .xfail (
1897
+ raises = TypeError ,
1898
+ match = r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type" ,
1899
+ )
1900
+ request .node .add_marker (mark )
1901
+
1902
+ elif isinstance (values , BooleanArray ) and op in ["sum" , "prod" ]:
1873
1903
# We expect to get Int64 back for these
1874
1904
override_dtype = "Int64"
1875
1905
@@ -1895,19 +1925,29 @@ def get_result():
1895
1925
1896
1926
if columns == "C" :
1897
1927
# i.e. SeriesGroupBy
1898
- if op in ["prod" , "sum" ]:
1928
+ if op in ["prod" , "sum" , "skew" ]:
1899
1929
# ops that require more than just ordered-ness
1900
1930
if df .dtypes [0 ].kind == "M" :
1901
1931
# GH#41291
1902
1932
# datetime64 -> prod and sum are invalid
1903
- msg = "datetime64 type does not support"
1933
+ if op == "skew" :
1934
+ msg = "'DatetimeArray' does not implement reduction 'skew'"
1935
+ else :
1936
+ msg = "datetime64 type does not support"
1904
1937
with pytest .raises (TypeError , match = msg ):
1905
1938
get_result ()
1906
1939
1907
1940
return
1908
- elif isinstance (values , Categorical ):
1941
+ if op in ["prod" , "sum" , "skew" , "mad" ]:
1942
+ if isinstance (values , Categorical ):
1909
1943
# GH#41291
1910
- msg = "category type does not support"
1944
+ if op == "mad" :
1945
+ # mad calls mean, which Categorical doesn't implement
1946
+ msg = "'Categorical' does not implement reduction 'mean'"
1947
+ elif op == "skew" :
1948
+ msg = f"'Categorical' does not implement reduction '{ op } '"
1949
+ else :
1950
+ msg = "category type does not support"
1911
1951
with pytest .raises (TypeError , match = msg ):
1912
1952
get_result ()
1913
1953
@@ -1954,6 +1994,34 @@ def get_result():
1954
1994
tm .assert_equal (result , expected )
1955
1995
return
1956
1996
1997
+ if (
1998
+ op in ["mad" , "min" , "max" , "skew" ]
1999
+ and isinstance (values , Categorical )
2000
+ and len (keys ) == 1
2001
+ ):
2002
+ # Categorical doesn't implement, so with numeric_only=True
2003
+ # these are dropped and we get an empty DataFrame back
2004
+ result = get_result ()
2005
+ expected = df .set_index (keys )[[]]
2006
+
2007
+ # with numeric_only=True, these are dropped, and we get
2008
+ # an empty DataFrame back
2009
+ if len (keys ) != 1 :
2010
+ # Categorical is special without 'observed=True'
2011
+ lev = Categorical ([0 ], dtype = values .dtype )
2012
+ mi = MultiIndex .from_product ([lev , lev ], names = keys )
2013
+ expected = DataFrame ([], columns = [], index = mi )
2014
+ else :
2015
+ # all columns are dropped, but we end up with one row
2016
+ # Categorical is special without 'observed=True'
2017
+ lev = Categorical ([0 ], dtype = values .dtype )
2018
+ ci = Index (lev , name = keys [0 ])
2019
+ expected = DataFrame ([], columns = [], index = ci )
2020
+ # expected = df.set_index(keys)[columns]
2021
+
2022
+ tm .assert_equal (result , expected )
2023
+ return
2024
+
1957
2025
result = get_result ()
1958
2026
expected = df .set_index (keys )[columns ]
1959
2027
if override_dtype is not None :
0 commit comments