|
2 | 2 | from io import StringIO
|
3 | 3 | from itertools import product
|
4 | 4 | from string import ascii_lowercase
|
| 5 | +import datetime as dt |
5 | 6 |
|
6 | 7 | import numpy as np
|
7 | 8 | import pytest
|
8 | 9 |
|
9 | 10 | from pandas.errors import UnsupportedFunctionCall
|
10 | 11 |
|
11 | 12 | import pandas as pd
|
12 |
| -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna |
| 13 | +from pandas import ( |
| 14 | + DataFrame, |
| 15 | + Index, |
| 16 | + MultiIndex, |
| 17 | + Series, |
| 18 | + Timestamp, |
| 19 | + date_range, |
| 20 | + isna, |
| 21 | + NaT, |
| 22 | +) |
13 | 23 | import pandas.core.nanops as nanops
|
14 | 24 | from pandas.util import _test_decorators as td, testing as tm
|
15 | 25 |
|
@@ -1015,6 +1025,42 @@ def test_nunique_with_timegrouper():
|
1015 | 1025 | tm.assert_series_equal(result, expected)
|
1016 | 1026 |
|
1017 | 1027 |
|
| 1028 | +@pytest.mark.parametrize( |
| 1029 | + "key, data, dropna, expected", |
| 1030 | + [ |
| 1031 | + ( |
| 1032 | + ["x", "x", "x"], |
| 1033 | + [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")], |
| 1034 | + True, |
| 1035 | + Series([1], index=pd.Index(["x"], name="key"), name="data"), |
| 1036 | + ), |
| 1037 | + ( |
| 1038 | + ["x", "x", "x"], |
| 1039 | + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], |
| 1040 | + True, |
| 1041 | + Series([1], index=pd.Index(["x"], name="key"), name="data"), |
| 1042 | + ), |
| 1043 | + ( |
| 1044 | + ["x", "x", "x", "y", "y"], |
| 1045 | + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], |
| 1046 | + False, |
| 1047 | + Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), |
| 1048 | + ), |
| 1049 | + ( |
| 1050 | + ["x", "x", "x", "x", "y"], |
| 1051 | + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], |
| 1052 | + False, |
| 1053 | + Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), |
| 1054 | + ), |
| 1055 | + ], |
| 1056 | +) |
| 1057 | +def test_nunique_with_NaT(key, data, dropna, expected): |
| 1058 | + # GH 27951 |
| 1059 | + df = pd.DataFrame({"key": key, "data": data}) |
| 1060 | + result = df.groupby(["key"])["data"].nunique(dropna=dropna) |
| 1061 | + tm.assert_series_equal(result, expected) |
| 1062 | + |
| 1063 | + |
1018 | 1064 | def test_nunique_preserves_column_level_names():
|
1019 | 1065 | # GH 23222
|
1020 | 1066 | test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
|
|
0 commit comments