Skip to content

Commit 46235cd

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
TST/REF: collect _libs tests (pandas-dev#37324)
1 parent eab3239 commit 46235cd

File tree

9 files changed

+397
-390
lines changed

9 files changed

+397
-390
lines changed

pandas/tests/groupby/test_bin_groupby.py

+2-35
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,10 @@
11
import numpy as np
22
import pytest
33

4-
from pandas._libs import groupby, lib, reduction as libreduction
5-
6-
from pandas.core.dtypes.common import ensure_int64
4+
from pandas._libs import lib, reduction as libreduction
75

86
import pandas as pd
9-
from pandas import Series, isna
7+
from pandas import Series
108
import pandas._testing as tm
119

1210

@@ -103,36 +101,5 @@ def test_generate_bins(binner, closed, expected):
103101
tm.assert_numpy_array_equal(result, expected)
104102

105103

106-
def test_group_ohlc():
107-
def _check(dtype):
108-
obj = np.array(np.random.randn(20), dtype=dtype)
109-
110-
bins = np.array([6, 12, 20])
111-
out = np.zeros((3, 4), dtype)
112-
counts = np.zeros(len(out), dtype=np.int64)
113-
labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
114-
115-
func = getattr(groupby, f"group_ohlc_{dtype}")
116-
func(out, counts, obj[:, None], labels)
117-
118-
def _ohlc(group):
119-
if isna(group).all():
120-
return np.repeat(np.nan, 4)
121-
return [group[0], group.max(), group.min(), group[-1]]
122-
123-
expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
124-
125-
tm.assert_almost_equal(out, expected)
126-
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
127-
128-
obj[:6] = np.nan
129-
func(out, counts, obj[:, None], labels)
130-
expected[0] = np.nan
131-
tm.assert_almost_equal(out, expected)
132-
133-
_check("float32")
134-
_check("float64")
135-
136-
137104
class TestMoments:
138105
pass
+237
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,237 @@
1+
import numpy as np
2+
3+
from pandas._libs import groupby as libgroupby
4+
from pandas._libs.groupby import (
5+
group_cumprod_float64,
6+
group_cumsum,
7+
group_var_float32,
8+
group_var_float64,
9+
)
10+
11+
from pandas.core.dtypes.common import ensure_int64
12+
13+
from pandas import isna
14+
import pandas._testing as tm
15+
16+
17+
class GroupVarTestMixin:
18+
def test_group_var_generic_1d(self):
19+
prng = np.random.RandomState(1234)
20+
21+
out = (np.nan * np.ones((5, 1))).astype(self.dtype)
22+
counts = np.zeros(5, dtype="int64")
23+
values = 10 * prng.rand(15, 1).astype(self.dtype)
24+
labels = np.tile(np.arange(5), (3,)).astype("int64")
25+
26+
expected_out = (
27+
np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2
28+
)[:, np.newaxis]
29+
expected_counts = counts + 3
30+
31+
self.algo(out, counts, values, labels)
32+
assert np.allclose(out, expected_out, self.rtol)
33+
tm.assert_numpy_array_equal(counts, expected_counts)
34+
35+
def test_group_var_generic_1d_flat_labels(self):
36+
prng = np.random.RandomState(1234)
37+
38+
out = (np.nan * np.ones((1, 1))).astype(self.dtype)
39+
counts = np.zeros(1, dtype="int64")
40+
values = 10 * prng.rand(5, 1).astype(self.dtype)
41+
labels = np.zeros(5, dtype="int64")
42+
43+
expected_out = np.array([[values.std(ddof=1) ** 2]])
44+
expected_counts = counts + 5
45+
46+
self.algo(out, counts, values, labels)
47+
48+
assert np.allclose(out, expected_out, self.rtol)
49+
tm.assert_numpy_array_equal(counts, expected_counts)
50+
51+
def test_group_var_generic_2d_all_finite(self):
52+
prng = np.random.RandomState(1234)
53+
54+
out = (np.nan * np.ones((5, 2))).astype(self.dtype)
55+
counts = np.zeros(5, dtype="int64")
56+
values = 10 * prng.rand(10, 2).astype(self.dtype)
57+
labels = np.tile(np.arange(5), (2,)).astype("int64")
58+
59+
expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
60+
expected_counts = counts + 2
61+
62+
self.algo(out, counts, values, labels)
63+
assert np.allclose(out, expected_out, self.rtol)
64+
tm.assert_numpy_array_equal(counts, expected_counts)
65+
66+
def test_group_var_generic_2d_some_nan(self):
67+
prng = np.random.RandomState(1234)
68+
69+
out = (np.nan * np.ones((5, 2))).astype(self.dtype)
70+
counts = np.zeros(5, dtype="int64")
71+
values = 10 * prng.rand(10, 2).astype(self.dtype)
72+
values[:, 1] = np.nan
73+
labels = np.tile(np.arange(5), (2,)).astype("int64")
74+
75+
expected_out = np.vstack(
76+
[
77+
values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2,
78+
np.nan * np.ones(5),
79+
]
80+
).T.astype(self.dtype)
81+
expected_counts = counts + 2
82+
83+
self.algo(out, counts, values, labels)
84+
tm.assert_almost_equal(out, expected_out, rtol=0.5e-06)
85+
tm.assert_numpy_array_equal(counts, expected_counts)
86+
87+
def test_group_var_constant(self):
88+
# Regression test from GH 10448.
89+
90+
out = np.array([[np.nan]], dtype=self.dtype)
91+
counts = np.array([0], dtype="int64")
92+
values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype)
93+
labels = np.zeros(3, dtype="int64")
94+
95+
self.algo(out, counts, values, labels)
96+
97+
assert counts[0] == 3
98+
assert out[0, 0] >= 0
99+
tm.assert_almost_equal(out[0, 0], 0.0)
100+
101+
102+
class TestGroupVarFloat64(GroupVarTestMixin):
103+
__test__ = True
104+
105+
algo = staticmethod(group_var_float64)
106+
dtype = np.float64
107+
rtol = 1e-5
108+
109+
def test_group_var_large_inputs(self):
110+
prng = np.random.RandomState(1234)
111+
112+
out = np.array([[np.nan]], dtype=self.dtype)
113+
counts = np.array([0], dtype="int64")
114+
values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
115+
values.shape = (10 ** 6, 1)
116+
labels = np.zeros(10 ** 6, dtype="int64")
117+
118+
self.algo(out, counts, values, labels)
119+
120+
assert counts[0] == 10 ** 6
121+
tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3)
122+
123+
124+
class TestGroupVarFloat32(GroupVarTestMixin):
125+
__test__ = True
126+
127+
algo = staticmethod(group_var_float32)
128+
dtype = np.float32
129+
rtol = 1e-2
130+
131+
132+
def test_group_ohlc():
133+
def _check(dtype):
134+
obj = np.array(np.random.randn(20), dtype=dtype)
135+
136+
bins = np.array([6, 12, 20])
137+
out = np.zeros((3, 4), dtype)
138+
counts = np.zeros(len(out), dtype=np.int64)
139+
labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
140+
141+
func = getattr(libgroupby, f"group_ohlc_{dtype}")
142+
func(out, counts, obj[:, None], labels)
143+
144+
def _ohlc(group):
145+
if isna(group).all():
146+
return np.repeat(np.nan, 4)
147+
return [group[0], group.max(), group.min(), group[-1]]
148+
149+
expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
150+
151+
tm.assert_almost_equal(out, expected)
152+
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
153+
154+
obj[:6] = np.nan
155+
func(out, counts, obj[:, None], labels)
156+
expected[0] = np.nan
157+
tm.assert_almost_equal(out, expected)
158+
159+
_check("float32")
160+
_check("float64")
161+
162+
163+
def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
164+
"""
165+
Check a group transform that executes a cumulative function.
166+
167+
Parameters
168+
----------
169+
pd_op : callable
170+
The pandas cumulative function.
171+
np_op : callable
172+
The analogous one in NumPy.
173+
dtype : type
174+
The specified dtype of the data.
175+
"""
176+
is_datetimelike = False
177+
178+
data = np.array([[1], [2], [3], [4]], dtype=dtype)
179+
ans = np.zeros_like(data)
180+
181+
labels = np.array([0, 0, 0, 0], dtype=np.int64)
182+
ngroups = 1
183+
pd_op(ans, data, labels, ngroups, is_datetimelike)
184+
185+
tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)
186+
187+
188+
def test_cython_group_transform_cumsum(any_real_dtype):
189+
# see gh-4095
190+
dtype = np.dtype(any_real_dtype).type
191+
pd_op, np_op = group_cumsum, np.cumsum
192+
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)
193+
194+
195+
def test_cython_group_transform_cumprod():
196+
# see gh-4095
197+
dtype = np.float64
198+
pd_op, np_op = group_cumprod_float64, np.cumproduct
199+
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)
200+
201+
202+
def test_cython_group_transform_algos():
203+
# see gh-4095
204+
is_datetimelike = False
205+
206+
# with nans
207+
labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
208+
ngroups = 1
209+
210+
data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
211+
actual = np.zeros_like(data)
212+
actual.fill(np.nan)
213+
group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
214+
expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
215+
tm.assert_numpy_array_equal(actual[:, 0], expected)
216+
217+
actual = np.zeros_like(data)
218+
actual.fill(np.nan)
219+
group_cumsum(actual, data, labels, ngroups, is_datetimelike)
220+
expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
221+
tm.assert_numpy_array_equal(actual[:, 0], expected)
222+
223+
# timedelta
224+
is_datetimelike = True
225+
data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
226+
actual = np.zeros_like(data, dtype="int64")
227+
group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
228+
expected = np.array(
229+
[
230+
np.timedelta64(1, "ns"),
231+
np.timedelta64(2, "ns"),
232+
np.timedelta64(3, "ns"),
233+
np.timedelta64(4, "ns"),
234+
np.timedelta64(5, "ns"),
235+
]
236+
)
237+
tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)

pandas/tests/groupby/transform/test_transform.py

-79
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._libs.groupby import group_cumprod_float64, group_cumsum
8-
97
from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype
108

119
import pandas as pd
@@ -515,83 +513,6 @@ def f(group):
515513
tm.assert_frame_equal(res, result.loc[key])
516514

517515

518-
def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
519-
"""
520-
Check a group transform that executes a cumulative function.
521-
522-
Parameters
523-
----------
524-
pd_op : callable
525-
The pandas cumulative function.
526-
np_op : callable
527-
The analogous one in NumPy.
528-
dtype : type
529-
The specified dtype of the data.
530-
"""
531-
is_datetimelike = False
532-
533-
data = np.array([[1], [2], [3], [4]], dtype=dtype)
534-
ans = np.zeros_like(data)
535-
536-
labels = np.array([0, 0, 0, 0], dtype=np.int64)
537-
ngroups = 1
538-
pd_op(ans, data, labels, ngroups, is_datetimelike)
539-
540-
tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)
541-
542-
543-
def test_cython_group_transform_cumsum(any_real_dtype):
544-
# see gh-4095
545-
dtype = np.dtype(any_real_dtype).type
546-
pd_op, np_op = group_cumsum, np.cumsum
547-
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)
548-
549-
550-
def test_cython_group_transform_cumprod():
551-
# see gh-4095
552-
dtype = np.float64
553-
pd_op, np_op = group_cumprod_float64, np.cumproduct
554-
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)
555-
556-
557-
def test_cython_group_transform_algos():
558-
# see gh-4095
559-
is_datetimelike = False
560-
561-
# with nans
562-
labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
563-
ngroups = 1
564-
565-
data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
566-
actual = np.zeros_like(data)
567-
actual.fill(np.nan)
568-
group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
569-
expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
570-
tm.assert_numpy_array_equal(actual[:, 0], expected)
571-
572-
actual = np.zeros_like(data)
573-
actual.fill(np.nan)
574-
group_cumsum(actual, data, labels, ngroups, is_datetimelike)
575-
expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
576-
tm.assert_numpy_array_equal(actual[:, 0], expected)
577-
578-
# timedelta
579-
is_datetimelike = True
580-
data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
581-
actual = np.zeros_like(data, dtype="int64")
582-
group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
583-
expected = np.array(
584-
[
585-
np.timedelta64(1, "ns"),
586-
np.timedelta64(2, "ns"),
587-
np.timedelta64(3, "ns"),
588-
np.timedelta64(4, "ns"),
589-
np.timedelta64(5, "ns"),
590-
]
591-
)
592-
tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)
593-
594-
595516
@pytest.mark.parametrize(
596517
"op, args, targop",
597518
[

pandas/tests/libs/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)