Skip to content

TST/REF: collect _libs tests #37324

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 2 additions & 35 deletions pandas/tests/groupby/test_bin_groupby.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import numpy as np
import pytest

from pandas._libs import groupby, lib, reduction as libreduction

from pandas.core.dtypes.common import ensure_int64
from pandas._libs import lib, reduction as libreduction

import pandas as pd
from pandas import Series, isna
from pandas import Series
import pandas._testing as tm


Expand Down Expand Up @@ -103,36 +101,5 @@ def test_generate_bins(binner, closed, expected):
tm.assert_numpy_array_equal(result, expected)


def test_group_ohlc():
def _check(dtype):
obj = np.array(np.random.randn(20), dtype=dtype)

bins = np.array([6, 12, 20])
out = np.zeros((3, 4), dtype)
counts = np.zeros(len(out), dtype=np.int64)
labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))

func = getattr(groupby, f"group_ohlc_{dtype}")
func(out, counts, obj[:, None], labels)

def _ohlc(group):
if isna(group).all():
return np.repeat(np.nan, 4)
return [group[0], group.max(), group.min(), group[-1]]

expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])

tm.assert_almost_equal(out, expected)
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))

obj[:6] = np.nan
func(out, counts, obj[:, None], labels)
expected[0] = np.nan
tm.assert_almost_equal(out, expected)

_check("float32")
_check("float64")


class TestMoments:
pass
237 changes: 237 additions & 0 deletions pandas/tests/groupby/test_libgroupby.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
import numpy as np

from pandas._libs import groupby as libgroupby
from pandas._libs.groupby import (
group_cumprod_float64,
group_cumsum,
group_var_float32,
group_var_float64,
)

from pandas.core.dtypes.common import ensure_int64

from pandas import isna
import pandas._testing as tm


class GroupVarTestMixin:
def test_group_var_generic_1d(self):
prng = np.random.RandomState(1234)

out = (np.nan * np.ones((5, 1))).astype(self.dtype)
counts = np.zeros(5, dtype="int64")
values = 10 * prng.rand(15, 1).astype(self.dtype)
labels = np.tile(np.arange(5), (3,)).astype("int64")

expected_out = (
np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2
)[:, np.newaxis]
expected_counts = counts + 3

self.algo(out, counts, values, labels)
assert np.allclose(out, expected_out, self.rtol)
tm.assert_numpy_array_equal(counts, expected_counts)

def test_group_var_generic_1d_flat_labels(self):
prng = np.random.RandomState(1234)

out = (np.nan * np.ones((1, 1))).astype(self.dtype)
counts = np.zeros(1, dtype="int64")
values = 10 * prng.rand(5, 1).astype(self.dtype)
labels = np.zeros(5, dtype="int64")

expected_out = np.array([[values.std(ddof=1) ** 2]])
expected_counts = counts + 5

self.algo(out, counts, values, labels)

assert np.allclose(out, expected_out, self.rtol)
tm.assert_numpy_array_equal(counts, expected_counts)

def test_group_var_generic_2d_all_finite(self):
prng = np.random.RandomState(1234)

out = (np.nan * np.ones((5, 2))).astype(self.dtype)
counts = np.zeros(5, dtype="int64")
values = 10 * prng.rand(10, 2).astype(self.dtype)
labels = np.tile(np.arange(5), (2,)).astype("int64")

expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
expected_counts = counts + 2

self.algo(out, counts, values, labels)
assert np.allclose(out, expected_out, self.rtol)
tm.assert_numpy_array_equal(counts, expected_counts)

def test_group_var_generic_2d_some_nan(self):
prng = np.random.RandomState(1234)

out = (np.nan * np.ones((5, 2))).astype(self.dtype)
counts = np.zeros(5, dtype="int64")
values = 10 * prng.rand(10, 2).astype(self.dtype)
values[:, 1] = np.nan
labels = np.tile(np.arange(5), (2,)).astype("int64")

expected_out = np.vstack(
[
values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2,
np.nan * np.ones(5),
]
).T.astype(self.dtype)
expected_counts = counts + 2

self.algo(out, counts, values, labels)
tm.assert_almost_equal(out, expected_out, rtol=0.5e-06)
tm.assert_numpy_array_equal(counts, expected_counts)

def test_group_var_constant(self):
# Regression test from GH 10448.

out = np.array([[np.nan]], dtype=self.dtype)
counts = np.array([0], dtype="int64")
values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype)
labels = np.zeros(3, dtype="int64")

self.algo(out, counts, values, labels)

assert counts[0] == 3
assert out[0, 0] >= 0
tm.assert_almost_equal(out[0, 0], 0.0)


class TestGroupVarFloat64(GroupVarTestMixin):
__test__ = True

algo = staticmethod(group_var_float64)
dtype = np.float64
rtol = 1e-5

def test_group_var_large_inputs(self):
prng = np.random.RandomState(1234)

out = np.array([[np.nan]], dtype=self.dtype)
counts = np.array([0], dtype="int64")
values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
values.shape = (10 ** 6, 1)
labels = np.zeros(10 ** 6, dtype="int64")

self.algo(out, counts, values, labels)

assert counts[0] == 10 ** 6
tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3)


class TestGroupVarFloat32(GroupVarTestMixin):
__test__ = True

algo = staticmethod(group_var_float32)
dtype = np.float32
rtol = 1e-2


def test_group_ohlc():
def _check(dtype):
obj = np.array(np.random.randn(20), dtype=dtype)

bins = np.array([6, 12, 20])
out = np.zeros((3, 4), dtype)
counts = np.zeros(len(out), dtype=np.int64)
labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))

func = getattr(libgroupby, f"group_ohlc_{dtype}")
func(out, counts, obj[:, None], labels)

def _ohlc(group):
if isna(group).all():
return np.repeat(np.nan, 4)
return [group[0], group.max(), group.min(), group[-1]]

expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])

tm.assert_almost_equal(out, expected)
tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))

obj[:6] = np.nan
func(out, counts, obj[:, None], labels)
expected[0] = np.nan
tm.assert_almost_equal(out, expected)

_check("float32")
_check("float64")


def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
"""
Check a group transform that executes a cumulative function.

Parameters
----------
pd_op : callable
The pandas cumulative function.
np_op : callable
The analogous one in NumPy.
dtype : type
The specified dtype of the data.
"""
is_datetimelike = False

data = np.array([[1], [2], [3], [4]], dtype=dtype)
ans = np.zeros_like(data)

labels = np.array([0, 0, 0, 0], dtype=np.int64)
ngroups = 1
pd_op(ans, data, labels, ngroups, is_datetimelike)

tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)


def test_cython_group_transform_cumsum(any_real_dtype):
# see gh-4095
dtype = np.dtype(any_real_dtype).type
pd_op, np_op = group_cumsum, np.cumsum
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)


def test_cython_group_transform_cumprod():
# see gh-4095
dtype = np.float64
pd_op, np_op = group_cumprod_float64, np.cumproduct
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)


def test_cython_group_transform_algos():
# see gh-4095
is_datetimelike = False

# with nans
labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
ngroups = 1

data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
actual = np.zeros_like(data)
actual.fill(np.nan)
group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
tm.assert_numpy_array_equal(actual[:, 0], expected)

actual = np.zeros_like(data)
actual.fill(np.nan)
group_cumsum(actual, data, labels, ngroups, is_datetimelike)
expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
tm.assert_numpy_array_equal(actual[:, 0], expected)

# timedelta
is_datetimelike = True
data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
actual = np.zeros_like(data, dtype="int64")
group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
expected = np.array(
[
np.timedelta64(1, "ns"),
np.timedelta64(2, "ns"),
np.timedelta64(3, "ns"),
np.timedelta64(4, "ns"),
np.timedelta64(5, "ns"),
]
)
tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)
79 changes: 0 additions & 79 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import numpy as np
import pytest

from pandas._libs.groupby import group_cumprod_float64, group_cumsum

from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype

import pandas as pd
Expand Down Expand Up @@ -515,83 +513,6 @@ def f(group):
tm.assert_frame_equal(res, result.loc[key])


def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
"""
Check a group transform that executes a cumulative function.

Parameters
----------
pd_op : callable
The pandas cumulative function.
np_op : callable
The analogous one in NumPy.
dtype : type
The specified dtype of the data.
"""
is_datetimelike = False

data = np.array([[1], [2], [3], [4]], dtype=dtype)
ans = np.zeros_like(data)

labels = np.array([0, 0, 0, 0], dtype=np.int64)
ngroups = 1
pd_op(ans, data, labels, ngroups, is_datetimelike)

tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)


def test_cython_group_transform_cumsum(any_real_dtype):
# see gh-4095
dtype = np.dtype(any_real_dtype).type
pd_op, np_op = group_cumsum, np.cumsum
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)


def test_cython_group_transform_cumprod():
# see gh-4095
dtype = np.float64
pd_op, np_op = group_cumprod_float64, np.cumproduct
_check_cython_group_transform_cumulative(pd_op, np_op, dtype)


def test_cython_group_transform_algos():
# see gh-4095
is_datetimelike = False

# with nans
labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
ngroups = 1

data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
actual = np.zeros_like(data)
actual.fill(np.nan)
group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
tm.assert_numpy_array_equal(actual[:, 0], expected)

actual = np.zeros_like(data)
actual.fill(np.nan)
group_cumsum(actual, data, labels, ngroups, is_datetimelike)
expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
tm.assert_numpy_array_equal(actual[:, 0], expected)

# timedelta
is_datetimelike = True
data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
actual = np.zeros_like(data, dtype="int64")
group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
expected = np.array(
[
np.timedelta64(1, "ns"),
np.timedelta64(2, "ns"),
np.timedelta64(3, "ns"),
np.timedelta64(4, "ns"),
np.timedelta64(5, "ns"),
]
)
tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)


@pytest.mark.parametrize(
"op, args, targop",
[
Expand Down
Empty file added pandas/tests/libs/__init__.py
Empty file.
Loading