pandas-dev · jreback · Nov 26, 2020 · Nov 14, 2020 · Nov 15, 2020 · Nov 15, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -235,6 +235,7 @@ Other enhancements
 - Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`)
 - Improve numerical stability for :meth:`Rolling.skew()`, :meth:`Rolling.kurt()`, :meth:`Expanding.skew()` and :meth:`Expanding.kurt()` through implementation of Kahan summation (:issue:`6929`)
 - Improved error reporting for subsetting columns of a :class:`DataFrameGroupBy` with ``axis=1`` (:issue:`37725`)
+- Add support for ``min_count`` keyword for :meth:`DataFrame.groupby` and :meth:`DataFrame.resample` for functions ``min``, ``max``, ``first`` and ``last`` (:issue:`37821`, :issue:`37768`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -903,8 +903,6 @@ def group_last(rank_t[:, :] out,
         ndarray[int64_t, ndim=2] nobs
         bint runtime_error = False
 
-    assert min_count == -1, "'min_count' only used in add and prod"
-
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
     if not len(values) == labels.shape[0]:
@@ -939,7 +937,7 @@ def group_last(rank_t[:, :] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count or nobs[i, j] == 0:
                     out[i, j] = NAN
                 else:
                     out[i, j] = resx[i, j]
@@ -961,7 +959,7 @@ def group_last(rank_t[:, :] out,
 
             for i in range(ncounts):
                 for j in range(K):
-                    if nobs[i, j] == 0:
+                    if nobs[i, j] < min_count or nobs[i, j] == 0:
                         if rank_t is int64_t:
                             out[i, j] = NPY_NAT
                         elif rank_t is uint64_t:
@@ -986,7 +984,8 @@ def group_last(rank_t[:, :] out,
 def group_nth(rank_t[:, :] out,
               int64_t[:] counts,
               ndarray[rank_t, ndim=2] values,
-              const int64_t[:] labels, int64_t rank=1
+              const int64_t[:] labels,
+              int64_t min_count=-1, int64_t rank=1
               ):
     """
     Only aggregates on axis=0
@@ -1033,7 +1032,7 @@ def group_nth(rank_t[:, :] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count or nobs[i, j] == 0:
                     out[i, j] = NAN
                 else:
                     out[i, j] = resx[i, j]
@@ -1057,7 +1056,7 @@ def group_nth(rank_t[:, :] out,
 
             for i in range(ncounts):
                 for j in range(K):
-                    if nobs[i, j] == 0:
+                    if nobs[i, j] < min_count or nobs[i, j] == 0:
                         if rank_t is int64_t:
                             out[i, j] = NPY_NAT
                         elif rank_t is uint64_t:
@@ -1294,8 +1293,6 @@ def group_max(groupby_t[:, :] out,
         bint runtime_error = False
         int64_t[:, :] nobs
 
-    assert min_count == -1, "'min_count' only used in add and prod"
-
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
     if not len(values) == labels.shape[0]:
@@ -1337,11 +1334,12 @@ def group_max(groupby_t[:, :] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count or nobs[i, j] == 0:
                     if groupby_t is uint64_t:
                         runtime_error = True
                         break
                     else:
+
                         out[i, j] = nan_val
                 else:
                     out[i, j] = maxx[i, j]
@@ -1369,8 +1367,6 @@ def group_min(groupby_t[:, :] out,
         bint runtime_error = False
         int64_t[:, :] nobs
 
-    assert min_count == -1, "'min_count' only used in add and prod"
-
     # TODO(cython 3.0):
     # Instead of `labels.shape[0]` use `len(labels)`
     if not len(values) == labels.shape[0]:
@@ -1411,7 +1407,7 @@ def group_min(groupby_t[:, :] out,
 
         for i in range(ncounts):
             for j in range(K):
-                if nobs[i, j] == 0:
+                if nobs[i, j] < min_count or nobs[i, j] == 0:
                     if groupby_t is uint64_t:
                         runtime_error = True
                         break

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
@@ -603,7 +603,7 @@ def _aggregate(
     ):
         if agg_func is libgroupby.group_nth:
             # different signature from the others
-            agg_func(result, counts, values, comp_ids, rank=1)
+            agg_func(result, counts, values, comp_ids, min_count, rank=1)
         else:
             agg_func(result, counts, values, comp_ids, min_count)
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -950,7 +950,7 @@ def quantile(self, q=0.5, **kwargs):
 
 
 # downsample methods
-for method in ["sum", "prod"]:
+for method in ["sum", "prod", "min", "max", "first", "last"]:
 
     def f(self, _method=method, min_count=0, *args, **kwargs):
         nv.validate_resampler_func(_method, args, kwargs)
@@ -961,7 +961,7 @@ def f(self, _method=method, min_count=0, *args, **kwargs):
 
 
 # downsample methods
-for method in ["min", "max", "first", "last", "mean", "sem", "median", "ohlc"]:
+for method in ["mean", "sem", "median", "ohlc"]:
 
     def g(self, _method=method, *args, **kwargs):
         nv.validate_resampler_func(_method, args, kwargs)

diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py
@@ -638,3 +638,12 @@ def weird_func(x):
 
     result = df["decimals"].groupby(df["id1"]).agg(weird_func)
     tm.assert_series_equal(result, expected, check_names=False)
+
+
+@pytest.mark.parametrize("func", ["first", "last", "max", "min"])
+def test_min_count_implementation_min_max_first_last(func):
+    # GH#37821
+    df = DataFrame({"a": [1] * 3, "b": [1, np.nan, np.nan]})
+    result = getattr(df.groupby("a"), func)(min_count=2)
+    expected = DataFrame({"b": [np.nan]}, index=Index([1], name="a"))
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
@@ -1785,3 +1785,16 @@ def test_resample_calendar_day_with_dst(
         1.0, pd.date_range(first, exp_last, freq=freq_out, tz="Europe/Amsterdam")
     )
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("func", ["min", "max", "first", "last"])
+def test_resample_aggregate_functions_min_count(func):
+    # GH#37768
+    index = date_range(start="2020", freq="M", periods=3)
+    ser = Series([1, np.nan, np.nan], index)
+    result = getattr(ser.resample("Q"), func)(min_count=2)
+    expected = Series(
+        [np.nan],
+        index=DatetimeIndex(["2020-03-31"], dtype="datetime64[ns]", freq="Q-DEC"),
+    )
+    tm.assert_series_equal(result, expected)