@@ -96,32 +96,29 @@ def test_int64_overflow_groupby_large_range(self):
96
96
97
97
@pytest .mark .parametrize ("agg" , ["mean" , "median" ])
98
98
def test_int64_overflow_groupby_large_df_shuffled (self , agg ):
99
- arr = np .random .randint (- 1 << 12 , 1 << 12 , (1 << 15 , 5 ))
100
- i = np .random .choice (len (arr ), len (arr ) * 4 )
99
+ rs = np .random .RandomState (42 )
100
+ arr = rs .randint (- 1 << 12 , 1 << 12 , (1 << 15 , 5 ))
101
+ i = rs .choice (len (arr ), len (arr ) * 4 )
101
102
arr = np .vstack ((arr , arr [i ])) # add some duplicate rows
102
103
103
- i = np . random .permutation (len (arr ))
104
+ i = rs .permutation (len (arr ))
104
105
arr = arr [i ] # shuffle rows
105
106
106
107
df = DataFrame (arr , columns = list ("abcde" ))
107
- df ["jim" ], df ["joe" ] = np .random . randn ( 2 , len (df )) * 10
108
+ df ["jim" ], df ["joe" ] = np .zeros (( 2 , len (df )))
108
109
gr = df .groupby (list ("abcde" ))
109
110
110
111
# verify this is testing what it is supposed to test!
111
112
assert is_int64_overflow_possible (gr .grouper .shape )
112
113
113
- # manually compute groupings
114
- jim , joe = defaultdict (list ), defaultdict (list )
115
- for key , a , b in zip (map (tuple , arr ), df ["jim" ], df ["joe" ]):
116
- jim [key ].append (a )
117
- joe [key ].append (b )
118
-
119
- assert len (gr ) == len (jim )
120
- mi = MultiIndex .from_tuples (jim .keys (), names = list ("abcde" ))
114
+ mi = MultiIndex .from_arrays (
115
+ [ar .ravel () for ar in np .array_split (np .unique (arr , axis = 0 ), 5 , axis = 1 )],
116
+ names = list ("abcde" ),
117
+ )
121
118
122
- f = lambda a : np . fromiter ( map ( getattr ( np , agg ), a ), dtype = "f8" )
123
- arr = np .vstack (( f ( jim . values ()), f ( joe . values ()))). T
124
- res = DataFrame ( arr , columns = [ "jim" , "joe" ], index = mi ).sort_index ()
119
+ res = DataFrame (
120
+ np .zeros (( len ( mi ), 2 )), columns = [ "jim" , "joe" ], index = mi
121
+ ).sort_index ()
125
122
126
123
tm .assert_frame_equal (getattr (gr , agg )(), res )
127
124
0 commit comments