@@ -322,6 +322,8 @@ def test_groupby_as_index_apply():
322
322
tm .assert_index_equal (res_as_apply , exp_as_apply )
323
323
tm .assert_index_equal (res_not_as_apply , exp_not_as_apply )
324
324
325
+
326
+ def test_groupby_as_index_apply_str ():
325
327
ind = Index (list ("abcde" ))
326
328
df = DataFrame ([[1 , 2 ], [2 , 3 ], [1 , 4 ], [1 , 5 ], [2 , 6 ]], index = ind )
327
329
msg = "DataFrameGroupBy.apply operated on the grouping columns"
@@ -379,8 +381,8 @@ def f(piece):
379
381
{"value" : piece , "demeaned" : piece - piece .mean (), "logged" : logged }
380
382
)
381
383
382
- dr = bdate_range ("1/1/2000" , periods = 100 )
383
- ts = Series (np .random .default_rng (2 ).standard_normal (100 ), index = dr )
384
+ dr = bdate_range ("1/1/2000" , periods = 10 )
385
+ ts = Series (np .random .default_rng (2 ).standard_normal (10 ), index = dr )
384
386
385
387
grouped = ts .groupby (lambda x : x .month , group_keys = False )
386
388
result = grouped .apply (f )
@@ -639,13 +641,13 @@ def reindex_helper(x):
639
641
def test_apply_corner_cases ():
640
642
# #535, can't use sliding iterator
641
643
642
- N = 1000
644
+ N = 10
643
645
labels = np .random .default_rng (2 ).integers (0 , 100 , size = N )
644
646
df = DataFrame (
645
647
{
646
648
"key" : labels ,
647
649
"value1" : np .random .default_rng (2 ).standard_normal (N ),
648
- "value2" : ["foo" , "bar" , "baz" , "qux" ] * (N // 4 ),
650
+ "value2" : ["foo" , "bar" , "baz" , "qux" , "a" ] * (N // 5 ),
649
651
}
650
652
)
651
653
@@ -680,6 +682,8 @@ def test_apply_numeric_coercion_when_datetime():
680
682
result = df .groupby (["Number" ]).apply (lambda x : x .iloc [0 ])
681
683
tm .assert_series_equal (result ["Str" ], expected ["Str" ])
682
684
685
+
686
+ def test_apply_numeric_coercion_when_datetime_getitem ():
683
687
# GH 15421
684
688
df = DataFrame (
685
689
{"A" : [10 , 20 , 30 ], "B" : ["foo" , "3" , "4" ], "T" : [pd .Timestamp ("12:31:22" )] * 3 }
@@ -695,6 +699,8 @@ def get_B(g):
695
699
expected .index = df .A
696
700
tm .assert_series_equal (result , expected )
697
701
702
+
703
+ def test_apply_numeric_coercion_when_datetime_with_nat ():
698
704
# GH 14423
699
705
def predictions (tool ):
700
706
out = Series (index = ["p1" , "p2" , "useTime" ], dtype = object )
@@ -843,10 +849,24 @@ def test_func(x):
843
849
tm .assert_frame_equal (result , expected )
844
850
845
851
846
- def test_groupby_apply_none_first ():
852
+ @pytest .mark .parametrize (
853
+ "in_data, out_idx, out_data" ,
854
+ [
855
+ [
856
+ {"groups" : [1 , 1 , 1 , 2 ], "vars" : [0 , 1 , 2 , 3 ]},
857
+ [[1 , 1 ], [0 , 2 ]],
858
+ {"groups" : [1 , 1 ], "vars" : [0 , 2 ]},
859
+ ],
860
+ [
861
+ {"groups" : [1 , 2 , 2 , 2 ], "vars" : [0 , 1 , 2 , 3 ]},
862
+ [[2 , 2 ], [1 , 3 ]],
863
+ {"groups" : [2 , 2 ], "vars" : [1 , 3 ]},
864
+ ],
865
+ ],
866
+ )
867
+ def test_groupby_apply_none_first (in_data , out_idx , out_data ):
847
868
# GH 12824. Tests if apply returns None first.
848
- test_df1 = DataFrame ({"groups" : [1 , 1 , 1 , 2 ], "vars" : [0 , 1 , 2 , 3 ]})
849
- test_df2 = DataFrame ({"groups" : [1 , 2 , 2 , 2 ], "vars" : [0 , 1 , 2 , 3 ]})
869
+ test_df1 = DataFrame (in_data )
850
870
851
871
def test_func (x ):
852
872
if x .shape [0 ] < 2 :
@@ -856,14 +876,9 @@ def test_func(x):
856
876
msg = "DataFrameGroupBy.apply operated on the grouping columns"
857
877
with tm .assert_produces_warning (DeprecationWarning , match = msg ):
858
878
result1 = test_df1 .groupby ("groups" ).apply (test_func )
859
- with tm .assert_produces_warning (DeprecationWarning , match = msg ):
860
- result2 = test_df2 .groupby ("groups" ).apply (test_func )
861
- index1 = MultiIndex .from_arrays ([[1 , 1 ], [0 , 2 ]], names = ["groups" , None ])
862
- index2 = MultiIndex .from_arrays ([[2 , 2 ], [1 , 3 ]], names = ["groups" , None ])
863
- expected1 = DataFrame ({"groups" : [1 , 1 ], "vars" : [0 , 2 ]}, index = index1 )
864
- expected2 = DataFrame ({"groups" : [2 , 2 ], "vars" : [1 , 3 ]}, index = index2 )
879
+ index1 = MultiIndex .from_arrays (out_idx , names = ["groups" , None ])
880
+ expected1 = DataFrame (out_data , index = index1 )
865
881
tm .assert_frame_equal (result1 , expected1 )
866
- tm .assert_frame_equal (result2 , expected2 )
867
882
868
883
869
884
def test_groupby_apply_return_empty_chunk ():
@@ -883,18 +898,16 @@ def test_groupby_apply_return_empty_chunk():
883
898
tm .assert_series_equal (result , expected )
884
899
885
900
886
- def test_apply_with_mixed_types ():
901
+ @pytest .mark .parametrize ("meth" , ["apply" , "transform" ])
902
+ def test_apply_with_mixed_types (meth ):
887
903
# gh-20949
888
904
df = DataFrame ({"A" : "a a b" .split (), "B" : [1 , 2 , 3 ], "C" : [4 , 6 , 5 ]})
889
905
g = df .groupby ("A" , group_keys = False )
890
906
891
- result = g . transform (lambda x : x / x .sum ())
907
+ result = getattr ( g , meth ) (lambda x : x / x .sum ())
892
908
expected = DataFrame ({"B" : [1 / 3.0 , 2 / 3.0 , 1 ], "C" : [0.4 , 0.6 , 1.0 ]})
893
909
tm .assert_frame_equal (result , expected )
894
910
895
- result = g .apply (lambda x : x / x .sum ())
896
- tm .assert_frame_equal (result , expected )
897
-
898
911
899
912
def test_func_returns_object ():
900
913
# GH 28652
@@ -1106,7 +1119,7 @@ def test_apply_function_with_indexing_return_column():
1106
1119
1107
1120
@pytest .mark .parametrize (
1108
1121
"udf" ,
1109
- [( lambda x : x .copy ()), ( lambda x : x .copy ().rename (lambda y : y + 1 ) )],
1122
+ [lambda x : x .copy (), lambda x : x .copy ().rename (lambda y : y + 1 )],
1110
1123
)
1111
1124
@pytest .mark .parametrize ("group_keys" , [True , False ])
1112
1125
def test_apply_result_type (group_keys , udf ):
@@ -1214,7 +1227,7 @@ def test_apply_with_date_in_multiindex_does_not_convert_to_timestamp():
1214
1227
expected = df .iloc [[0 , 2 , 3 ]]
1215
1228
expected = expected .reset_index ()
1216
1229
expected .index = MultiIndex .from_frame (expected [["A" , "B" , "idx" ]])
1217
- expected = expected .drop (columns = "idx" )
1230
+ expected = expected .drop (columns = [ "idx" ] )
1218
1231
1219
1232
tm .assert_frame_equal (result , expected )
1220
1233
for val in result .index .levels [1 ]:
0 commit comments