@@ -818,12 +818,14 @@ def test_preserve_categories():
818
818
819
819
# ordered=True
820
820
df = DataFrame ({"A" : Categorical (list ("ba" ), categories = categories , ordered = True )})
821
- index = CategoricalIndex (categories , categories , ordered = True , name = "A" )
821
+ sort_index = CategoricalIndex (categories , categories , ordered = True , name = "A" )
822
+ nosort_index = CategoricalIndex (list ("bac" ), categories , ordered = True , name = "A" )
822
823
tm .assert_index_equal (
823
- df .groupby ("A" , sort = True , observed = False ).first ().index , index
824
+ df .groupby ("A" , sort = True , observed = False ).first ().index , sort_index
824
825
)
826
+ # GH#42482 - don't sort result when sort=False, even when ordered=True
825
827
tm .assert_index_equal (
826
- df .groupby ("A" , sort = False , observed = False ).first ().index , index
828
+ df .groupby ("A" , sort = False , observed = False ).first ().index , nosort_index
827
829
)
828
830
829
831
# ordered=False
@@ -972,8 +974,11 @@ def test_sort():
972
974
tm .assert_series_equal (res , exp )
973
975
974
976
975
- def test_sort2 ():
977
+ @pytest .mark .parametrize ("ordered" , [True , False ])
978
+ def test_sort2 (sort , ordered ):
976
979
# dataframe groupby sort was being ignored # GH 8868
980
+ # GH#48749 - don't change order of categories
981
+ # GH#42482 - don't sort result when sort=False, even when ordered=True
977
982
df = DataFrame (
978
983
[
979
984
["(7.5, 10]" , 10 , 10 ],
@@ -986,53 +991,28 @@ def test_sort2():
986
991
],
987
992
columns = ["range" , "foo" , "bar" ],
988
993
)
989
- df ["range" ] = Categorical (df ["range" ], ordered = True )
990
- index = CategoricalIndex (
991
- ["(0, 2.5]" , "(2.5, 5]" , "(5, 7.5]" , "(7.5, 10]" ], name = "range" , ordered = True
992
- )
993
- expected_sort = DataFrame (
994
- [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]], columns = ["foo" , "bar" ], index = index
995
- )
996
-
997
- col = "range"
998
- result_sort = df .groupby (col , sort = True , observed = False ).first ()
999
- tm .assert_frame_equal (result_sort , expected_sort )
1000
-
1001
- # when categories is ordered, group is ordered by category's order
1002
- expected_sort = result_sort
1003
- result_sort = df .groupby (col , sort = False , observed = False ).first ()
1004
- tm .assert_frame_equal (result_sort , expected_sort )
994
+ df ["range" ] = Categorical (df ["range" ], ordered = ordered )
995
+ result = df .groupby ("range" , sort = sort , observed = False ).first ()
1005
996
1006
- df ["range" ] = Categorical (df ["range" ], ordered = False )
1007
- index = CategoricalIndex (
1008
- ["(0, 2.5]" , "(2.5, 5]" , "(5, 7.5]" , "(7.5, 10]" ], name = "range"
1009
- )
1010
- expected_sort = DataFrame (
1011
- [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]], columns = ["foo" , "bar" ], index = index
1012
- )
1013
-
1014
- index = CategoricalIndex (
1015
- ["(7.5, 10]" , "(2.5, 5]" , "(5, 7.5]" , "(0, 2.5]" ],
1016
- # GH#48749 - don't change order of categories
1017
- categories = ["(0, 2.5]" , "(2.5, 5]" , "(5, 7.5]" , "(7.5, 10]" ],
1018
- name = "range" ,
1019
- )
1020
- expected_nosort = DataFrame (
1021
- [[10 , 10 ], [5 , 30 ], [6 , 40 ], [1 , 60 ]], index = index , columns = ["foo" , "bar" ]
997
+ if sort :
998
+ data_values = [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]]
999
+ index_values = ["(0, 2.5]" , "(2.5, 5]" , "(5, 7.5]" , "(7.5, 10]" ]
1000
+ else :
1001
+ data_values = [[10 , 10 ], [5 , 30 ], [6 , 40 ], [1 , 60 ]]
1002
+ index_values = ["(7.5, 10]" , "(2.5, 5]" , "(5, 7.5]" , "(0, 2.5]" ]
1003
+ expected = DataFrame (
1004
+ data_values ,
1005
+ columns = ["foo" , "bar" ],
1006
+ index = CategoricalIndex (index_values , name = "range" , ordered = ordered ),
1022
1007
)
1023
1008
1024
- col = "range"
1025
-
1026
- # this is an unordered categorical, but we allow this ####
1027
- result_sort = df .groupby (col , sort = True , observed = False ).first ()
1028
- tm .assert_frame_equal (result_sort , expected_sort )
1029
-
1030
- result_nosort = df .groupby (col , sort = False , observed = False ).first ()
1031
- tm .assert_frame_equal (result_nosort , expected_nosort )
1009
+ tm .assert_frame_equal (result , expected )
1032
1010
1033
1011
1034
- def test_sort_datetimelike ():
1012
+ @pytest .mark .parametrize ("ordered" , [True , False ])
1013
+ def test_sort_datetimelike (sort , ordered ):
1035
1014
# GH10505
1015
+ # GH#42482 - don't sort result when sort=False, even when ordered=True
1036
1016
1037
1017
# use same data as test_groupby_sort_categorical, which category is
1038
1018
# corresponding to datetime.month
@@ -1054,80 +1034,30 @@ def test_sort_datetimelike():
1054
1034
)
1055
1035
1056
1036
# ordered=True
1057
- df ["dt" ] = Categorical (df ["dt" ], ordered = True )
1058
- index = [
1059
- datetime (2011 , 1 , 1 ),
1060
- datetime (2011 , 2 , 1 ),
1061
- datetime (2011 , 5 , 1 ),
1062
- datetime (2011 , 7 , 1 ),
1063
- ]
1064
- result_sort = DataFrame (
1065
- [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]], columns = ["foo" , "bar" ]
1066
- )
1067
- result_sort .index = CategoricalIndex (index , name = "dt" , ordered = True )
1068
-
1069
- index = [
1070
- datetime (2011 , 7 , 1 ),
1071
- datetime (2011 , 2 , 1 ),
1072
- datetime (2011 , 5 , 1 ),
1073
- datetime (2011 , 1 , 1 ),
1074
- ]
1075
- result_nosort = DataFrame (
1076
- [[10 , 10 ], [5 , 30 ], [6 , 40 ], [1 , 60 ]], columns = ["foo" , "bar" ]
1077
- )
1078
- result_nosort .index = CategoricalIndex (
1079
- index , categories = index , name = "dt" , ordered = True
1080
- )
1081
-
1082
- col = "dt"
1083
- tm .assert_frame_equal (
1084
- result_sort , df .groupby (col , sort = True , observed = False ).first ()
1085
- )
1086
-
1087
- # when categories is ordered, group is ordered by category's order
1088
- tm .assert_frame_equal (
1089
- result_sort , df .groupby (col , sort = False , observed = False ).first ()
1090
- )
1091
-
1092
- # ordered = False
1093
- df ["dt" ] = Categorical (df ["dt" ], ordered = False )
1094
- sort_index = CategoricalIndex (
1095
- [
1037
+ df ["dt" ] = Categorical (df ["dt" ], ordered = ordered )
1038
+ if sort :
1039
+ data_values = [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]]
1040
+ index_values = [
1096
1041
datetime (2011 , 1 , 1 ),
1097
1042
datetime (2011 , 2 , 1 ),
1098
1043
datetime (2011 , 5 , 1 ),
1099
1044
datetime (2011 , 7 , 1 ),
1100
- ],
1101
- name = "dt" ,
1102
- )
1103
- result_sort = DataFrame (
1104
- [[1 , 60 ], [5 , 30 ], [6 , 40 ], [10 , 10 ]], columns = ["foo" , "bar" ], index = sort_index
1105
- )
1106
-
1107
- nosort_index = CategoricalIndex (
1108
- [
1045
+ ]
1046
+ else :
1047
+ data_values = [[10 , 10 ], [5 , 30 ], [6 , 40 ], [1 , 60 ]]
1048
+ index_values = [
1109
1049
datetime (2011 , 7 , 1 ),
1110
1050
datetime (2011 , 2 , 1 ),
1111
1051
datetime (2011 , 5 , 1 ),
1112
1052
datetime (2011 , 1 , 1 ),
1113
- ],
1114
- # GH#48749 - don't change order of categories
1115
- categories = sort_index .categories ,
1116
- name = "dt" ,
1117
- )
1118
- result_nosort = DataFrame (
1119
- [[10 , 10 ], [5 , 30 ], [6 , 40 ], [1 , 60 ]],
1053
+ ]
1054
+ expected = DataFrame (
1055
+ data_values ,
1120
1056
columns = ["foo" , "bar" ],
1121
- index = nosort_index ,
1122
- )
1123
-
1124
- col = "dt"
1125
- tm .assert_frame_equal (
1126
- result_sort , df .groupby (col , sort = True , observed = False ).first ()
1127
- )
1128
- tm .assert_frame_equal (
1129
- result_nosort , df .groupby (col , sort = False , observed = False ).first ()
1057
+ index = CategoricalIndex (index_values , name = "dt" , ordered = ordered ),
1130
1058
)
1059
+ result = df .groupby ("dt" , sort = sort , observed = False ).first ()
1060
+ tm .assert_frame_equal (result , expected )
1131
1061
1132
1062
1133
1063
def test_empty_sum ():
@@ -2055,13 +1985,10 @@ def test_category_order_apply(as_index, sort, observed, method, index_kind, orde
2055
1985
2056
1986
2057
1987
@pytest .mark .parametrize ("index_kind" , ["range" , "single" , "multi" ])
2058
- def test_many_categories (request , as_index , sort , index_kind , ordered ):
1988
+ def test_many_categories (as_index , sort , index_kind , ordered ):
2059
1989
# GH#48749 - Test when the grouper has many categories
2060
1990
if index_kind != "range" and not as_index :
2061
1991
pytest .skip (reason = "Result doesn't have categories, nothing to test" )
2062
- if index_kind == "multi" and as_index and not sort and ordered :
2063
- msg = "GH#48749 - values are unsorted even though the Categorical is ordered"
2064
- request .node .add_marker (pytest .mark .xfail (reason = msg ))
2065
1992
categories = np .arange (9999 , - 1 , - 1 )
2066
1993
grouper = Categorical ([2 , 1 , 2 , 3 ], categories = categories , ordered = ordered )
2067
1994
df = DataFrame ({"a" : grouper , "b" : range (4 )})
@@ -2078,7 +2005,7 @@ def test_many_categories(request, as_index, sort, index_kind, ordered):
2078
2005
result = gb .sum ()
2079
2006
2080
2007
# Test is setup so that data and index are the same values
2081
- data = [3 , 2 , 1 ] if sort or ordered else [2 , 1 , 3 ]
2008
+ data = [3 , 2 , 1 ] if sort else [2 , 1 , 3 ]
2082
2009
2083
2010
index = CategoricalIndex (
2084
2011
data , categories = grouper .categories , ordered = ordered , name = "a"
0 commit comments