|
1 | 1 | import builtins
|
| 2 | +import datetime as dt |
2 | 3 | from io import StringIO
|
3 | 4 | from itertools import product
|
4 | 5 | from string import ascii_lowercase
|
|
9 | 10 | from pandas.errors import UnsupportedFunctionCall
|
10 | 11 |
|
11 | 12 | import pandas as pd
|
12 |
| -from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna |
| 13 | +from pandas import ( |
| 14 | + DataFrame, |
| 15 | + Index, |
| 16 | + MultiIndex, |
| 17 | + NaT, |
| 18 | + Series, |
| 19 | + Timestamp, |
| 20 | + date_range, |
| 21 | + isna, |
| 22 | +) |
13 | 23 | import pandas.core.nanops as nanops
|
14 | 24 | from pandas.util import _test_decorators as td, testing as tm
|
15 | 25 |
|
@@ -1015,6 +1025,81 @@ def test_nunique_with_timegrouper():
|
1015 | 1025 | tm.assert_series_equal(result, expected)
|
1016 | 1026 |
|
1017 | 1027 |
|
| 1028 | +@pytest.mark.parametrize( |
| 1029 | + "data, dropna, expected", |
| 1030 | + [ |
| 1031 | + ( |
| 1032 | + DataFrame( |
| 1033 | + { |
| 1034 | + "key": ["x", "x", "x", "x", "x"], |
| 1035 | + "data": [ |
| 1036 | + Timestamp("2019-01-01 00:00:00"), |
| 1037 | + NaT, |
| 1038 | + Timestamp("2019-01-01 00:00:00"), |
| 1039 | + NaT, |
| 1040 | + Timestamp("2019-01-01 00:00:00"), |
| 1041 | + ], |
| 1042 | + } |
| 1043 | + ), |
| 1044 | + True, |
| 1045 | + Series([1], index=pd.Index(["x"], name="key"), name="data"), |
| 1046 | + ), |
| 1047 | + ( |
| 1048 | + DataFrame( |
| 1049 | + { |
| 1050 | + "key": ["x", "x", "x", "x", "x"], |
| 1051 | + "data": [ |
| 1052 | + dt.date(2019, 1, 1), |
| 1053 | + NaT, |
| 1054 | + dt.date(2019, 1, 1), |
| 1055 | + NaT, |
| 1056 | + dt.date(2019, 1, 1), |
| 1057 | + ], |
| 1058 | + } |
| 1059 | + ), |
| 1060 | + True, |
| 1061 | + Series([1], index=pd.Index(["x"], name="key"), name="data"), |
| 1062 | + ), |
| 1063 | + ( |
| 1064 | + DataFrame( |
| 1065 | + { |
| 1066 | + "key": ["x", "x", "x", "y", "y"], |
| 1067 | + "data": [ |
| 1068 | + dt.date(2019, 1, 1), |
| 1069 | + NaT, |
| 1070 | + dt.date(2019, 1, 1), |
| 1071 | + NaT, |
| 1072 | + dt.date(2019, 1, 1), |
| 1073 | + ], |
| 1074 | + } |
| 1075 | + ), |
| 1076 | + False, |
| 1077 | + Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), |
| 1078 | + ), |
| 1079 | + ( |
| 1080 | + DataFrame( |
| 1081 | + { |
| 1082 | + "key": ["x", "x", "x", "x", "y"], |
| 1083 | + "data": [ |
| 1084 | + dt.date(2019, 1, 1), |
| 1085 | + NaT, |
| 1086 | + dt.date(2019, 1, 1), |
| 1087 | + NaT, |
| 1088 | + dt.date(2019, 1, 1), |
| 1089 | + ], |
| 1090 | + } |
| 1091 | + ), |
| 1092 | + False, |
| 1093 | + Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), |
| 1094 | + ), |
| 1095 | + ], |
| 1096 | +) |
| 1097 | +def test_nunique_with_NaT(data, dropna, expected): |
| 1098 | + # GH 27951 |
| 1099 | + result = data.groupby(["key"])["data"].nunique(dropna=dropna) |
| 1100 | + tm.assert_series_equal(result, expected) |
| 1101 | + |
| 1102 | + |
1018 | 1103 | def test_nunique_preserves_column_level_names():
|
1019 | 1104 | # GH 23222
|
1020 | 1105 | test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))
|
|
0 commit comments