Skip to content

Commit abffeff

Browse files
dsaxtonrhshadrach
authored andcommitted
TST/CLN: Break out groupby function tests (pandas-dev#34073)
1 parent ccd5c13 commit abffeff

File tree

6 files changed

+632
-627
lines changed

6 files changed

+632
-627
lines changed

pandas/tests/groupby/test_counting.py

+134-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
from itertools import product
2+
from string import ascii_lowercase
23

34
import numpy as np
45
import pytest
56

6-
from pandas import DataFrame, Index, MultiIndex, Period, Series, Timedelta, Timestamp
7+
import pandas as pd
8+
from pandas import (
9+
DataFrame,
10+
Index,
11+
MultiIndex,
12+
Period,
13+
Series,
14+
Timedelta,
15+
Timestamp,
16+
date_range,
17+
)
718
import pandas._testing as tm
819

920

@@ -229,3 +240,125 @@ def test_count_groupby_column_with_nan_in_groupby_column(self):
229240
index=Index([0.0, 3.0, 4.0, 5.0], name="B"), data={"A": [1, 1, 1, 1]}
230241
)
231242
tm.assert_frame_equal(expected, res)
243+
244+
245+
def test_groupby_timedelta_cython_count():
246+
df = DataFrame(
247+
{"g": list("ab" * 2), "delt": np.arange(4).astype("timedelta64[ns]")}
248+
)
249+
expected = Series([2, 2], index=pd.Index(["a", "b"], name="g"), name="delt")
250+
result = df.groupby("g").delt.count()
251+
tm.assert_series_equal(expected, result)
252+
253+
254+
def test_count():
255+
n = 1 << 15
256+
dr = date_range("2015-08-30", periods=n // 10, freq="T")
257+
258+
df = DataFrame(
259+
{
260+
"1st": np.random.choice(list(ascii_lowercase), n),
261+
"2nd": np.random.randint(0, 5, n),
262+
"3rd": np.random.randn(n).round(3),
263+
"4th": np.random.randint(-10, 10, n),
264+
"5th": np.random.choice(dr, n),
265+
"6th": np.random.randn(n).round(3),
266+
"7th": np.random.randn(n).round(3),
267+
"8th": np.random.choice(dr, n) - np.random.choice(dr, 1),
268+
"9th": np.random.choice(list(ascii_lowercase), n),
269+
}
270+
)
271+
272+
for col in df.columns.drop(["1st", "2nd", "4th"]):
273+
df.loc[np.random.choice(n, n // 10), col] = np.nan
274+
275+
df["9th"] = df["9th"].astype("category")
276+
277+
for key in ["1st", "2nd", ["1st", "2nd"]]:
278+
left = df.groupby(key).count()
279+
right = df.groupby(key).apply(DataFrame.count).drop(key, axis=1)
280+
tm.assert_frame_equal(left, right)
281+
282+
283+
def test_count_non_nulls():
284+
# GH#5610
285+
# count counts non-nulls
286+
df = pd.DataFrame(
287+
[[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, np.nan]],
288+
columns=["A", "B", "C"],
289+
)
290+
291+
count_as = df.groupby("A").count()
292+
count_not_as = df.groupby("A", as_index=False).count()
293+
294+
expected = DataFrame([[1, 2], [0, 0]], columns=["B", "C"], index=[1, 3])
295+
expected.index.name = "A"
296+
tm.assert_frame_equal(count_not_as, expected.reset_index())
297+
tm.assert_frame_equal(count_as, expected)
298+
299+
count_B = df.groupby("A")["B"].count()
300+
tm.assert_series_equal(count_B, expected["B"])
301+
302+
303+
def test_count_object():
304+
df = pd.DataFrame({"a": ["a"] * 3 + ["b"] * 3, "c": [2] * 3 + [3] * 3})
305+
result = df.groupby("c").a.count()
306+
expected = pd.Series([3, 3], index=pd.Index([2, 3], name="c"), name="a")
307+
tm.assert_series_equal(result, expected)
308+
309+
df = pd.DataFrame({"a": ["a", np.nan, np.nan] + ["b"] * 3, "c": [2] * 3 + [3] * 3})
310+
result = df.groupby("c").a.count()
311+
expected = pd.Series([1, 3], index=pd.Index([2, 3], name="c"), name="a")
312+
tm.assert_series_equal(result, expected)
313+
314+
315+
def test_count_cross_type():
316+
# GH8169
317+
vals = np.hstack(
318+
(np.random.randint(0, 5, (100, 2)), np.random.randint(0, 2, (100, 2)))
319+
)
320+
321+
df = pd.DataFrame(vals, columns=["a", "b", "c", "d"])
322+
df[df == 2] = np.nan
323+
expected = df.groupby(["c", "d"]).count()
324+
325+
for t in ["float32", "object"]:
326+
df["a"] = df["a"].astype(t)
327+
df["b"] = df["b"].astype(t)
328+
result = df.groupby(["c", "d"]).count()
329+
tm.assert_frame_equal(result, expected)
330+
331+
332+
def test_lower_int_prec_count():
333+
df = DataFrame(
334+
{
335+
"a": np.array([0, 1, 2, 100], np.int8),
336+
"b": np.array([1, 2, 3, 6], np.uint32),
337+
"c": np.array([4, 5, 6, 8], np.int16),
338+
"grp": list("ab" * 2),
339+
}
340+
)
341+
result = df.groupby("grp").count()
342+
expected = DataFrame(
343+
{"a": [2, 2], "b": [2, 2], "c": [2, 2]}, index=pd.Index(list("ab"), name="grp")
344+
)
345+
tm.assert_frame_equal(result, expected)
346+
347+
348+
def test_count_uses_size_on_exception():
349+
class RaisingObjectException(Exception):
350+
pass
351+
352+
class RaisingObject:
353+
def __init__(self, msg="I will raise inside Cython"):
354+
super().__init__()
355+
self.msg = msg
356+
357+
def __eq__(self, other):
358+
# gets called in Cython to check that raising calls the method
359+
raise RaisingObjectException(self.msg)
360+
361+
df = DataFrame({"a": [RaisingObject() for _ in range(4)], "grp": list("ab" * 2)})
362+
result = df.groupby("grp").count()
363+
expected = DataFrame({"a": [2, 2]}, index=pd.Index(list("ab"), name="grp"))
364+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)