17
17
qcut ,
18
18
)
19
19
import pandas .util .testing as tm
20
- from pandas .util .testing import assert_equal , assert_frame_equal , assert_series_equal
21
20
22
21
23
22
def cartesian_product_for_groupers (result , args , names ):
@@ -159,7 +158,7 @@ def f(x):
159
158
exp_idx = CategoricalIndex (levels , categories = cats .categories , ordered = True )
160
159
expected = expected .reindex (exp_idx )
161
160
162
- assert_frame_equal (result , expected )
161
+ tm . assert_frame_equal (result , expected )
163
162
164
163
grouped = data .groupby (cats , observed = False )
165
164
desc_result = grouped .describe ()
@@ -172,7 +171,7 @@ def f(x):
172
171
ord_labels , ordered = True , categories = ["foo" , "bar" , "baz" , "qux" ]
173
172
)
174
173
expected = ord_data .groupby (exp_cats , sort = False , observed = False ).describe ()
175
- assert_frame_equal (desc_result , expected )
174
+ tm . assert_frame_equal (desc_result , expected )
176
175
177
176
# GH 10460
178
177
expc = Categorical .from_codes (np .arange (4 ).repeat (8 ), levels , ordered = True )
@@ -206,7 +205,7 @@ def test_level_get_group(observed):
206
205
)
207
206
result = g .get_group ("a" )
208
207
209
- assert_frame_equal (result , expected )
208
+ tm . assert_frame_equal (result , expected )
210
209
211
210
212
211
# GH#21636 flaky on py37; may be related to older numpy, see discussion
@@ -232,21 +231,21 @@ def test_apply(ordered):
232
231
# is coming back as Series([0., 1., 0.], index=["missing", "dense", "values"])
233
232
# when we expect Series(0., index=["values"])
234
233
result = grouped .apply (lambda x : np .mean (x ))
235
- assert_frame_equal (result , expected )
234
+ tm . assert_frame_equal (result , expected )
236
235
237
236
# we coerce back to ints
238
237
expected = expected .astype ("int" )
239
238
result = grouped .mean ()
240
- assert_frame_equal (result , expected )
239
+ tm . assert_frame_equal (result , expected )
241
240
242
241
result = grouped .agg (np .mean )
243
- assert_frame_equal (result , expected )
242
+ tm . assert_frame_equal (result , expected )
244
243
245
244
# but for transform we should still get back the original index
246
245
idx = MultiIndex .from_arrays ([missing , dense ], names = ["missing" , "dense" ])
247
246
expected = Series (1 , index = idx )
248
247
result = grouped .apply (lambda x : 1 )
249
- assert_series_equal (result , expected )
248
+ tm . assert_series_equal (result , expected )
250
249
251
250
252
251
def test_observed (observed ):
@@ -335,7 +334,7 @@ def test_observed(observed):
335
334
c , i = key
336
335
result = groups_double_key .get_group (key )
337
336
expected = df [(df .cat == c ) & (df .ints == i )]
338
- assert_frame_equal (result , expected )
337
+ tm . assert_frame_equal (result , expected )
339
338
340
339
# gh-8869
341
340
# with as_index
@@ -522,7 +521,7 @@ def test_datetime():
522
521
expected .index , categories = expected .index , ordered = True
523
522
)
524
523
525
- assert_frame_equal (result , expected )
524
+ tm . assert_frame_equal (result , expected )
526
525
527
526
grouped = data .groupby (cats , observed = False )
528
527
desc_result = grouped .describe ()
@@ -531,7 +530,7 @@ def test_datetime():
531
530
ord_labels = cats .take (idx )
532
531
ord_data = data .take (idx )
533
532
expected = ord_data .groupby (ord_labels , observed = False ).describe ()
534
- assert_frame_equal (desc_result , expected )
533
+ tm . assert_frame_equal (desc_result , expected )
535
534
tm .assert_index_equal (desc_result .index , expected .index )
536
535
tm .assert_index_equal (
537
536
desc_result .index .get_level_values (0 ), expected .index .get_level_values (0 )
@@ -560,15 +559,15 @@ def test_categorical_index():
560
559
expected .index = CategoricalIndex (
561
560
Categorical .from_codes ([0 , 1 , 2 , 3 ], levels , ordered = True ), name = "cats"
562
561
)
563
- assert_frame_equal (result , expected )
562
+ tm . assert_frame_equal (result , expected )
564
563
565
564
# with a cat column, should produce a cat index
566
565
result = df .groupby ("cats" , observed = False ).sum ()
567
566
expected = df [list ("abcd" )].groupby (cats .codes , observed = False ).sum ()
568
567
expected .index = CategoricalIndex (
569
568
Categorical .from_codes ([0 , 1 , 2 , 3 ], levels , ordered = True ), name = "cats"
570
569
)
571
- assert_frame_equal (result , expected )
570
+ tm . assert_frame_equal (result , expected )
572
571
573
572
574
573
def test_describe_categorical_columns ():
@@ -757,7 +756,7 @@ def test_categorical_no_compress():
757
756
exp .index = CategoricalIndex (
758
757
exp .index , categories = cats .categories , ordered = cats .ordered
759
758
)
760
- assert_series_equal (result , exp )
759
+ tm . assert_series_equal (result , exp )
761
760
762
761
codes = np .array ([0 , 0 , 0 , 1 , 1 , 1 , 3 , 3 , 3 ])
763
762
cats = Categorical .from_codes (codes , [0 , 1 , 2 , 3 ], ordered = True )
@@ -767,7 +766,7 @@ def test_categorical_no_compress():
767
766
exp .index = CategoricalIndex (
768
767
exp .index , categories = cats .categories , ordered = cats .ordered
769
768
)
770
- assert_series_equal (result , exp )
769
+ tm . assert_series_equal (result , exp )
771
770
772
771
cats = Categorical (
773
772
["a" , "a" , "a" , "b" , "b" , "b" , "c" , "c" , "c" ],
@@ -829,12 +828,12 @@ def test_sort2():
829
828
830
829
col = "range"
831
830
result_sort = df .groupby (col , sort = True , observed = False ).first ()
832
- assert_frame_equal (result_sort , expected_sort )
831
+ tm . assert_frame_equal (result_sort , expected_sort )
833
832
834
833
# when categories is ordered, group is ordered by category's order
835
834
expected_sort = result_sort
836
835
result_sort = df .groupby (col , sort = False , observed = False ).first ()
837
- assert_frame_equal (result_sort , expected_sort )
836
+ tm . assert_frame_equal (result_sort , expected_sort )
838
837
839
838
df ["range" ] = Categorical (df ["range" ], ordered = False )
840
839
index = CategoricalIndex (
@@ -857,10 +856,10 @@ def test_sort2():
857
856
858
857
# this is an unordered categorical, but we allow this ####
859
858
result_sort = df .groupby (col , sort = True , observed = False ).first ()
860
- assert_frame_equal (result_sort , expected_sort )
859
+ tm . assert_frame_equal (result_sort , expected_sort )
861
860
862
861
result_nosort = df .groupby (col , sort = False , observed = False ).first ()
863
- assert_frame_equal (result_nosort , expected_nosort )
862
+ tm . assert_frame_equal (result_nosort , expected_nosort )
864
863
865
864
866
865
def test_sort_datetimelike ():
@@ -912,10 +911,14 @@ def test_sort_datetimelike():
912
911
)
913
912
914
913
col = "dt"
915
- assert_frame_equal (result_sort , df .groupby (col , sort = True , observed = False ).first ())
914
+ tm .assert_frame_equal (
915
+ result_sort , df .groupby (col , sort = True , observed = False ).first ()
916
+ )
916
917
917
918
# when categories is ordered, group is ordered by category's order
918
- assert_frame_equal (result_sort , df .groupby (col , sort = False , observed = False ).first ())
919
+ tm .assert_frame_equal (
920
+ result_sort , df .groupby (col , sort = False , observed = False ).first ()
921
+ )
919
922
920
923
# ordered = False
921
924
df ["dt" ] = Categorical (df ["dt" ], ordered = False )
@@ -942,8 +945,10 @@ def test_sort_datetimelike():
942
945
result_nosort .index = CategoricalIndex (index , categories = index , name = "dt" )
943
946
944
947
col = "dt"
945
- assert_frame_equal (result_sort , df .groupby (col , sort = True , observed = False ).first ())
946
- assert_frame_equal (
948
+ tm .assert_frame_equal (
949
+ result_sort , df .groupby (col , sort = True , observed = False ).first ()
950
+ )
951
+ tm .assert_frame_equal (
947
952
result_nosort , df .groupby (col , sort = False , observed = False ).first ()
948
953
)
949
954
@@ -1022,7 +1027,7 @@ def test_groupby_multiindex_categorical_datetime():
1022
1027
names = ["key1" , "key2" ],
1023
1028
)
1024
1029
expected = DataFrame ({"values" : [0 , 4 , 8 , 3 , 4 , 5 , 6 , np .nan , 2 ]}, index = idx )
1025
- assert_frame_equal (result , expected )
1030
+ tm . assert_frame_equal (result , expected )
1026
1031
1027
1032
1028
1033
@pytest .mark .parametrize (
@@ -1058,7 +1063,7 @@ def test_groupby_agg_observed_true_single_column(as_index, expected):
1058
1063
1059
1064
result = df .groupby (["a" , "b" ], as_index = as_index , observed = True )["x" ].sum ()
1060
1065
1061
- assert_equal (result , expected )
1066
+ tm . assert_equal (result , expected )
1062
1067
1063
1068
1064
1069
@pytest .mark .parametrize ("fill_value" , [None , np .nan , pd .NaT ])
@@ -1070,7 +1075,7 @@ def test_shift(fill_value):
1070
1075
[None , "a" , "b" , "c" ], categories = ["a" , "b" , "c" , "d" ], ordered = False
1071
1076
)
1072
1077
res = ct .shift (1 , fill_value = fill_value )
1073
- assert_equal (res , expected )
1078
+ tm . assert_equal (res , expected )
1074
1079
1075
1080
1076
1081
@pytest .fixture
@@ -1112,7 +1117,7 @@ def test_seriesgroupby_observed_true(df_cat, operation, kwargs):
1112
1117
expected = Series (data = [1 , 3 , 2 , 4 ], index = index , name = "C" )
1113
1118
grouped = df_cat .groupby (["A" , "B" ], observed = True )["C" ]
1114
1119
result = getattr (grouped , operation )(sum )
1115
- assert_series_equal (result , expected )
1120
+ tm . assert_series_equal (result , expected )
1116
1121
1117
1122
1118
1123
@pytest .mark .parametrize ("operation" , ["agg" , "apply" ])
@@ -1130,7 +1135,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation):
1130
1135
expected = Series (data = [2 , 4 , np .nan , 1 , np .nan , 3 ], index = index , name = "C" )
1131
1136
grouped = df_cat .groupby (["A" , "B" ], observed = observed )["C" ]
1132
1137
result = getattr (grouped , operation )(sum )
1133
- assert_series_equal (result , expected )
1138
+ tm . assert_series_equal (result , expected )
1134
1139
1135
1140
1136
1141
@pytest .mark .parametrize (
@@ -1185,7 +1190,7 @@ def test_seriesgroupby_observed_apply_dict(df_cat, observed, index, data):
1185
1190
result = df_cat .groupby (["A" , "B" ], observed = observed )["C" ].apply (
1186
1191
lambda x : OrderedDict ([("min" , x .min ()), ("max" , x .max ())])
1187
1192
)
1188
- assert_series_equal (result , expected )
1193
+ tm . assert_series_equal (result , expected )
1189
1194
1190
1195
1191
1196
@pytest .mark .parametrize ("code" , [([1 , 0 , 0 ]), ([0 , 0 , 0 ])])
@@ -1195,7 +1200,7 @@ def test_groupby_categorical_axis_1(code):
1195
1200
cat = pd .Categorical .from_codes (code , categories = list ("abc" ))
1196
1201
result = df .groupby (cat , axis = 1 ).mean ()
1197
1202
expected = df .T .groupby (cat , axis = 0 ).mean ().T
1198
- assert_frame_equal (result , expected )
1203
+ tm . assert_frame_equal (result , expected )
1199
1204
1200
1205
1201
1206
def test_groupby_cat_preserves_structure (observed , ordered_fixture ):
@@ -1212,7 +1217,7 @@ def test_groupby_cat_preserves_structure(observed, ordered_fixture):
1212
1217
.reset_index ()
1213
1218
)
1214
1219
1215
- assert_frame_equal (result , expected )
1220
+ tm . assert_frame_equal (result , expected )
1216
1221
1217
1222
1218
1223
def test_get_nonexistent_category ():
0 commit comments