Skip to content

Commit d7f64de

Browse files
committed
merge with upstream
2 parents 2d65e38 + 0725440 commit d7f64de

File tree

3 files changed

+90
-1
lines changed

3 files changed

+90
-1
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ Groupby/resample/rolling
178178

179179
-
180180
- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
181+
- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
181182
- Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
182183
- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
183184

pandas/core/groupby/generic.py

+3
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,9 @@ def nunique(self, dropna=True):
11471147

11481148
val = self.obj._internal_get_values()
11491149

1150+
# GH 27951
1151+
val[isna(val)] = np.datetime64("NaT")
1152+
11501153
try:
11511154
sorter = np.lexsort((val, ids))
11521155
except TypeError: # catches object dtypes

pandas/tests/groupby/test_function.py

+86-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import builtins
2+
import datetime as dt
23
from io import StringIO
34
from itertools import product
45
from string import ascii_lowercase
@@ -9,7 +10,16 @@
910
from pandas.errors import UnsupportedFunctionCall
1011

1112
import pandas as pd
12-
from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
13+
from pandas import (
14+
DataFrame,
15+
Index,
16+
MultiIndex,
17+
NaT,
18+
Series,
19+
Timestamp,
20+
date_range,
21+
isna,
22+
)
1323
import pandas.core.nanops as nanops
1424
from pandas.util import _test_decorators as td, testing as tm
1525

@@ -1015,6 +1025,81 @@ def test_nunique_with_timegrouper():
10151025
tm.assert_series_equal(result, expected)
10161026

10171027

1028+
@pytest.mark.parametrize(
1029+
"data, dropna, expected",
1030+
[
1031+
(
1032+
DataFrame(
1033+
{
1034+
"key": ["x", "x", "x", "x", "x"],
1035+
"data": [
1036+
Timestamp("2019-01-01 00:00:00"),
1037+
NaT,
1038+
Timestamp("2019-01-01 00:00:00"),
1039+
NaT,
1040+
Timestamp("2019-01-01 00:00:00"),
1041+
],
1042+
}
1043+
),
1044+
True,
1045+
Series([1], index=pd.Index(["x"], name="key"), name="data"),
1046+
),
1047+
(
1048+
DataFrame(
1049+
{
1050+
"key": ["x", "x", "x", "x", "x"],
1051+
"data": [
1052+
dt.date(2019, 1, 1),
1053+
NaT,
1054+
dt.date(2019, 1, 1),
1055+
NaT,
1056+
dt.date(2019, 1, 1),
1057+
],
1058+
}
1059+
),
1060+
True,
1061+
Series([1], index=pd.Index(["x"], name="key"), name="data"),
1062+
),
1063+
(
1064+
DataFrame(
1065+
{
1066+
"key": ["x", "x", "x", "y", "y"],
1067+
"data": [
1068+
dt.date(2019, 1, 1),
1069+
NaT,
1070+
dt.date(2019, 1, 1),
1071+
NaT,
1072+
dt.date(2019, 1, 1),
1073+
],
1074+
}
1075+
),
1076+
False,
1077+
Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
1078+
),
1079+
(
1080+
DataFrame(
1081+
{
1082+
"key": ["x", "x", "x", "x", "y"],
1083+
"data": [
1084+
dt.date(2019, 1, 1),
1085+
NaT,
1086+
dt.date(2019, 1, 1),
1087+
NaT,
1088+
dt.date(2019, 1, 1),
1089+
],
1090+
}
1091+
),
1092+
False,
1093+
Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
1094+
),
1095+
],
1096+
)
1097+
def test_nunique_with_NaT(data, dropna, expected):
1098+
# GH 27951
1099+
result = data.groupby(["key"])["data"].nunique(dropna=dropna)
1100+
tm.assert_series_equal(result, expected)
1101+
1102+
10181103
def test_nunique_preserves_column_level_names():
10191104
# GH 23222
10201105
test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))

0 commit comments

Comments
 (0)