@@ -2622,14 +2622,12 @@ def test_filter_out_no_groups(self):
2622
2622
grouper = s .apply (lambda x : x % 2 )
2623
2623
grouped = s .groupby (grouper )
2624
2624
filtered = grouped .filter (lambda x : x .mean () > 0 )
2625
- filtered .sort () # was sorted by group
2626
- s .sort () # was sorted arbitrarily
2627
2625
assert_series_equal (filtered , s )
2628
2626
df = pd .DataFrame ({'A' : [1 , 12 , 12 , 1 ], 'B' : 'a b c d' .split ()})
2629
2627
grouper = df ['A' ].apply (lambda x : x % 2 )
2630
2628
grouped = df .groupby (grouper )
2631
2629
filtered = grouped .filter (lambda x : x ['A' ].mean () > 0 )
2632
- assert_frame_equal (filtered . sort () , df )
2630
+ assert_frame_equal (filtered , df )
2633
2631
2634
2632
def test_filter_condition_raises (self ):
2635
2633
import pandas as pd
@@ -2706,7 +2704,7 @@ def test_filter_against_workaround(self):
2706
2704
old_way = df [grouped .floats .\
2707
2705
transform (lambda x : x .mean () > N / 20 ).astype ('bool' )]
2708
2706
new_way = grouped .filter (lambda x : x ['floats' ].mean () > N / 20 )
2709
- assert_frame_equal (new_way . sort () , old_way . sort () )
2707
+ assert_frame_equal (new_way , old_way )
2710
2708
2711
2709
# Group by floats (rounded); filter on strings.
2712
2710
grouper = df .floats .apply (lambda x : np .round (x , - 1 ))
@@ -2715,14 +2713,14 @@ def test_filter_against_workaround(self):
2715
2713
transform (lambda x : len (x ) < N / 10 ).astype ('bool' )]
2716
2714
new_way = grouped .filter (
2717
2715
lambda x : len (x .letters ) < N / 10 )
2718
- assert_frame_equal (new_way . sort () , old_way . sort () )
2716
+ assert_frame_equal (new_way , old_way )
2719
2717
2720
2718
# Group by strings; filter on ints.
2721
2719
grouped = df .groupby ('letters' )
2722
2720
old_way = df [grouped .ints .\
2723
2721
transform (lambda x : x .mean () > N / 20 ).astype ('bool' )]
2724
2722
new_way = grouped .filter (lambda x : x ['ints' ].mean () > N / 20 )
2725
- assert_frame_equal (new_way . sort_index () , old_way . sort_index () )
2723
+ assert_frame_equal (new_way , old_way )
2726
2724
2727
2725
def test_filter_using_len (self ):
2728
2726
# BUG GH4447
@@ -2747,6 +2745,48 @@ def test_filter_using_len(self):
2747
2745
expected = s [[]]
2748
2746
assert_series_equal (actual , expected )
2749
2747
2748
+ def test_filter_maintains_ordering (self ):
2749
+ # Simple case: index is sequential. #4621
2750
+ df = DataFrame ({'pid' : [1 ,1 ,1 ,2 ,2 ,3 ,3 ,3 ],
2751
+ 'tag' : [23 ,45 ,62 ,24 ,45 ,34 ,25 ,62 ]})
2752
+ s = df ['pid' ]
2753
+ grouped = df .groupby ('tag' )
2754
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2755
+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2756
+ assert_frame_equal (actual , expected )
2757
+
2758
+ grouped = s .groupby (df ['tag' ])
2759
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2760
+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2761
+ assert_series_equal (actual , expected )
2762
+
2763
+ # Now index is sequentially decreasing.
2764
+ df .index = np .arange (len (df ) - 1 , - 1 , - 1 )
2765
+ s = df ['pid' ]
2766
+ grouped = df .groupby ('tag' )
2767
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2768
+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2769
+ assert_frame_equal (actual , expected )
2770
+
2771
+ grouped = s .groupby (df ['tag' ])
2772
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2773
+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2774
+ assert_series_equal (actual , expected )
2775
+
2776
+ # Index is shuffled.
2777
+ SHUFFLED = [4 , 6 , 7 , 2 , 1 , 0 , 5 , 3 ]
2778
+ df .index = df .index [SHUFFLED ]
2779
+ s = df ['pid' ]
2780
+ grouped = df .groupby ('tag' )
2781
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2782
+ expected = df .iloc [[1 , 2 , 4 , 7 ]]
2783
+ assert_frame_equal (actual , expected )
2784
+
2785
+ grouped = s .groupby (df ['tag' ])
2786
+ actual = grouped .filter (lambda x : len (x ) > 1 )
2787
+ expected = s .iloc [[1 , 2 , 4 , 7 ]]
2788
+ assert_series_equal (actual , expected )
2789
+
2750
2790
def test_groupby_whitelist (self ):
2751
2791
from string import ascii_lowercase
2752
2792
letters = np .array (list (ascii_lowercase ))
0 commit comments