pandas-dev · jreback · Dec 22, 2020 · Dec 11, 2020 · Dec 11, 2020 · Dec 11, 2020
diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst
@@ -1888,6 +1888,34 @@ Those two examples are equivalent for this time series:
 
 Note the use of ``'start'`` for ``origin`` on the last example. In that case, ``origin`` will be set to the first value of the timeseries.
 
+Backward resample
+~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.3.0
+
+Instead of adjusting the beginning of bins, sometimes we need to fix the end of the bins to make a backward resample with a given ``freq``. The backward resample sets ``closed`` to ``'right'`` by default since the last value should be considered as the edge point for the last bin.
+
+We can set ``origin`` to ``'end'``. The value for a specific ``Timestamp`` index stands for the resample result from the current ``Timestamp`` minus ``freq`` to the current ``Timestamp`` with a right close.
+
+.. ipython:: python
+
+   ts.resample('17min', origin='end').sum()
+
+Besides, in contrast with the ``'start_day'`` option, ``end_day`` is supported. This will set the origin as the ceiling midnight of the largest ``Timestamp``.
+
+.. ipython:: python
+
+   ts.resample('17min', origin='end_day').sum()
+
+The above result uses ``2000-10-02 00:29:00`` as the last bin's right edge since the following computation.
+
+.. ipython:: python
+
+   ceil_mid = rng.max().ceil('D')
+   freq = pd.offsets.Minute(17)
+   bin_res = ceil_mid - freq * ((ceil_mid - rng.max()) // freq)
+   bin_res
+
 .. _timeseries.periods:
 
 Time span representation

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -40,6 +40,7 @@ Other enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - Added :meth:`MultiIndex.dtypes` (:issue:`37062`)
+- Added ``end`` and ``end_day`` options for ``origin`` in :meth:`DataFrame.resample` (:issue:`37804`)
 - Improve error message when ``usecols`` and ``names`` do not match for :func:`read_csv` and ``engine="c"`` (:issue:`29042`)
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -8050,7 +8050,8 @@ def resample(
         level : str or int, optional
             For a MultiIndex, level (name or number) to use for
             resampling. `level` must be datetime-like.
-        origin : {{'epoch','start','start_day'}}, Timestamp or str, default 'start_day'
+        origin : {{'epoch', 'start', 'start_day', 'end', 'end_day'}}, Timestamp
+            or str, default 'start_day'
             The timestamp on which to adjust the grouping. The timezone of origin
             must match the timezone of the index.
             If a timestamp is not used, these values are also supported:
@@ -8061,6 +8062,11 @@ def resample(
 
             .. versionadded:: 1.1.0
 
+            - 'end': `origin` is the last value of the timeseries
+            - 'end_day': `origin` is the ceiling midnight of the last day
+
+            .. versionadded:: 1.3.0
+
         offset : Timedelta or str, default is None
             An offset timedelta added to the origin.
 
@@ -8343,6 +8349,26 @@ def resample(
         2000-10-02 00:21:00    24
         Freq: 17T, dtype: int64
 
+        If you want to take the largest Timestamp as the end of the bins:
+
+        >>> ts.resample('17min', origin='end').sum()
+        2000-10-01 23:35:00     0
+        2000-10-01 23:52:00    18
+        2000-10-02 00:09:00    27
+        2000-10-02 00:26:00    63
+        Freq: 17T, dtype: int64
+
+        In contrast with the `start_day`, you can use `end_day` to take the ceiling
+        midnight of the largest Timestamp as the end of the bins and drop the bins
+        not containing data:
+
+        >>> ts.resample('17min', origin='end_day').sum()
+        2000-10-01 23:38:00     3
+        2000-10-01 23:55:00    15
+        2000-10-02 00:12:00    45
+        2000-10-02 00:29:00    45
+        Freq: 17T, dtype: int64
+
         To replace the use of the deprecated `base` argument, you can now use `offset`,
         in this example it is equivalent to have `base=2`:
 

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -82,7 +82,8 @@ class Grouper:
             However, loffset is also deprecated for ``.resample(...)``
             See: :class:`DataFrame.resample`
 
-    origin : {'epoch', 'start', 'start_day'}, Timestamp or str, default 'start_day'
+    origin : {{'epoch', 'start', 'start_day', 'end', 'end_day'}}, Timestamp
+        or str, default 'start_day'
         The timestamp on which to adjust the grouping. The timezone of origin must
         match the timezone of the index.
         If a timestamp is not used, these values are also supported:
@@ -93,6 +94,11 @@ class Grouper:
 
         .. versionadded:: 1.1.0
 
+        - 'end': `origin` is the last value of the timeseries
+        - 'end_day': `origin` is the ceiling midnight of the last day
+
+        .. versionadded:: 1.3.0
+
     offset : Timedelta or str, default is None
         An offset timedelta added to the origin.
 

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -1388,10 +1388,22 @@ def __init__(
             if label is None:
                 label = "right"
         else:
-            if closed is None:
-                closed = "left"
-            if label is None:
-                label = "left"
+            # The backward resample sets ``closed`` to ``'right'`` by default
+            # since the last value should be considered as the edge point for
+            # the last bin. When origin in "end" or "end_day", the value for a
+            # specific ``Timestamp`` index stands for the resample result from
+            # the current ``Timestamp`` minus ``freq`` to the current
+            # ``Timestamp`` with a right close.
+            if origin in ["end", "end_day"]:
+                if closed is None:
+                    closed = "right"
+                if label is None:
+                    label = "right"
+            else:
+                if closed is None:
+                    closed = "left"
+                if label is None:
+                    label = "left"
 
         self.closed = closed
         self.label = label
@@ -1404,14 +1416,15 @@ def __init__(
         self.fill_method = fill_method
         self.limit = limit
 
-        if origin in ("epoch", "start", "start_day"):
+        if origin in ("epoch", "start", "start_day", "end", "end_day"):
             self.origin = origin
         else:
             try:
                 self.origin = Timestamp(origin)
             except Exception as e:
                 raise ValueError(
-                    "'origin' should be equal to 'epoch', 'start', 'start_day' or "
+                    "'origin' should be equal to 'epoch', 'start', 'start_day', "
+                    "'end', 'end_day' or "
                     f"should be a Timestamp convertible type. Got '{origin}' instead."
                 ) from e
 
@@ -1846,6 +1859,13 @@ def _adjust_dates_anchored(
         origin_nanos = first.value
     elif isinstance(origin, Timestamp):
         origin_nanos = origin.value
+    elif origin in ["end", "end_day"]:
+        origin = last if origin == "end" else last.ceil("D")
+        sub_freq_times = (origin.value - first.value) // freq.nanos
+        if closed == "left":
+            sub_freq_times += 1
+        first = origin - sub_freq_times * freq
+        origin_nanos = first.value
     origin_nanos += offset.value if offset else 0
 
     # GH 10117 & GH 19375. If first and last contain timezone information,

diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
@@ -772,8 +772,9 @@ def test_resample_bad_origin(origin):
     rng = date_range("2000-01-01 00:00:00", "2000-01-01 02:00", freq="s")
     ts = Series(np.random.randn(len(rng)), index=rng)
     msg = (
-        "'origin' should be equal to 'epoch', 'start', 'start_day' or "
-        f"should be a Timestamp convertible type. Got '{origin}' instead."
+        "'origin' should be equal to 'epoch', 'start', 'start_day', "
+        "'end', 'end_day' or should be a Timestamp convertible type. Got "
+        f"'{origin}' instead."
     )
     with pytest.raises(ValueError, match=msg):
         ts.resample("5min", origin=origin)

diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py
@@ -611,3 +611,80 @@ def test_resample_agg_readonly():
 
     result = rs.agg("min")
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "start,end,freq,data,resample_freq,origin,closed,exp_data,exp_end,exp_periods",
+    [
+        (
+            "2000-10-01 23:30:00",
+            "2000-10-02 00:26:00",
+            "7min",
+            [0, 3, 6, 9, 12, 15, 18, 21, 24],
+            "17min",
+            "end",
+            None,
+            [0, 18, 27, 63],
+            "20001002 00:26:00",
+            4,
+        ),
+        (
+            "20200101 8:26:35",
+            "20200101 9:31:58",
+            "77s",
+            [1] * 51,
+            "7min",
+            "end",
+            "right",
+            [1, 6, 5, 6, 5, 6, 5, 6, 5, 6],
+            "2020-01-01 09:30:45",
+            10,
+        ),
+        (
+            "2000-10-01 23:30:00",
+            "2000-10-02 00:26:00",
+            "7min",
+            [0, 3, 6, 9, 12, 15, 18, 21, 24],
+            "17min",
+            "end",
+            "left",
+            [0, 18, 27, 39, 24],
+            "20001002 00:43:00",
+            5,
+        ),
+        (
+            "2000-10-01 23:30:00",
+            "2000-10-02 00:26:00",
+            "7min",
+            [0, 3, 6, 9, 12, 15, 18, 21, 24],
+            "17min",
+            "end_day",
+            None,
+            [3, 15, 45, 45],
+            "2000-10-02 00:29:00",
+            4,
+        ),
+    ],
+)
+def test_end_and_end_day_origin(
+    start,
+    end,
+    freq,
+    data,
+    resample_freq,
+    origin,
+    closed,
+    exp_data,
+    exp_end,
+    exp_periods,
+):
+    rng = date_range(start, end, freq=freq)
+    ts = Series(data, index=rng)
+
+    res = ts.resample(resample_freq, origin=origin, closed=closed).sum()
+    expected = Series(
+        exp_data,
+        index=date_range(end=exp_end, freq=resample_freq, periods=exp_periods),
+    )
+
+    tm.assert_series_equal(res, expected)