Skip to content

Commit 9fd26fb

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into STY-repr-batch-3
2 parents f44d489 + 1078363 commit 9fd26fb

File tree

19 files changed

+554
-251
lines changed

19 files changed

+554
-251
lines changed

doc/source/conf.py

+10-18
Original file line numberDiff line numberDiff line change
@@ -296,20 +296,15 @@
296296

297297
for method in methods:
298298
# ... and each of its public methods
299-
moved_api_pages.append(
300-
(
301-
"{old}.{method}".format(old=old, method=method),
302-
"{new}.{method}".format(new=new, method=method),
303-
)
304-
)
299+
moved_api_pages.append((f"{old}.{method}", f"{new}.{method}",))
305300

306301
if pattern is None:
307302
html_additional_pages = {
308303
"generated/" + page[0]: "api_redirect.html" for page in moved_api_pages
309304
}
310305

311306

312-
header = """\
307+
header = f"""\
313308
.. currentmodule:: pandas
314309
315310
.. ipython:: python
@@ -323,10 +318,8 @@
323318
pd.options.display.max_rows = 15
324319
325320
import os
326-
os.chdir(r'{}')
327-
""".format(
328-
os.path.dirname(os.path.dirname(__file__))
329-
)
321+
os.chdir(r'{os.path.dirname(os.path.dirname(__file__))}')
322+
"""
330323

331324

332325
html_context = {
@@ -575,7 +568,7 @@ def _add_deprecation_prefixes(self, items):
575568
for item in items:
576569
display_name, sig, summary, real_name = item
577570
if self._is_deprecated(real_name):
578-
summary = "(DEPRECATED) %s" % summary
571+
summary = f"(DEPRECATED) {summary}"
579572
yield display_name, sig, summary, real_name
580573

581574
def get_items(self, names):
@@ -620,19 +613,18 @@ def linkcode_resolve(domain, info):
620613
lineno = None
621614

622615
if lineno:
623-
linespec = "#L{:d}-L{:d}".format(lineno, lineno + len(source) - 1)
616+
linespec = f"#L{lineno}-L{lineno + len(source) - 1}"
624617
else:
625618
linespec = ""
626619

627620
fn = os.path.relpath(fn, start=os.path.dirname(pandas.__file__))
628621

629622
if "+" in pandas.__version__:
630-
return "http://github.com/pandas-dev/pandas/blob/master/pandas/{}{}".format(
631-
fn, linespec
632-
)
623+
return f"http://github.com/pandas-dev/pandas/blob/master/pandas/{fn}{linespec}"
633624
else:
634-
return "http://github.com/pandas-dev/pandas/blob/v{}/pandas/{}{}".format(
635-
pandas.__version__, fn, linespec
625+
return (
626+
f"http://github.com/pandas-dev/pandas/blob/"
627+
f"v{pandas.__version__}/pandas/{fn}{linespec}"
636628
)
637629

638630

doc/source/reference/window.rst

+11
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,14 @@ Exponentially-weighted moving window functions
7474
EWM.var
7575
EWM.corr
7676
EWM.cov
77+
78+
Window Indexer
79+
--------------
80+
.. currentmodule:: pandas
81+
82+
Base class for defining custom window boundaries.
83+
84+
.. autosummary::
85+
:toctree: api/
86+
87+
api.indexers.BaseIndexer

doc/source/user_guide/computation.rst

+58
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,64 @@ default of the index) in a DataFrame.
466466
dft
467467
dft.rolling('2s', on='foo').sum()
468468
469+
.. _stats.custom_rolling_window:
470+
471+
Custom window rolling
472+
~~~~~~~~~~~~~~~~~~~~~
473+
474+
.. versionadded:: 1.0
475+
476+
In addition to accepting an integer or offset as a ``window`` argument, ``rolling`` also accepts
477+
a ``BaseIndexer`` subclass that allows a user to define a custom method for calculating window bounds.
478+
The ``BaseIndexer`` subclass will need to define a ``get_window_bounds`` method that returns
479+
a tuple of two arrays, the first being the starting indices of the windows and second being the
480+
ending indices of the windows. Additionally, ``num_values``, ``min_periods``, ``center``, ``closed``
481+
and will automatically be passed to ``get_window_bounds`` and the defined method must
482+
always accept these arguments.
483+
484+
For example, if we have the following ``DataFrame``:
485+
486+
.. ipython:: python
487+
488+
use_expanding = [True, False, True, False, True]
489+
use_expanding
490+
df = pd.DataFrame({'values': range(5)})
491+
df
492+
493+
and we want to use an expanding window where ``use_expanding`` is ``True`` otherwise a window of size
494+
1, we can create the following ``BaseIndexer``:
495+
496+
.. code-block:: ipython
497+
498+
In [2]: from pandas.api.indexers import BaseIndexer
499+
...:
500+
...: class CustomIndexer(BaseIndexer):
501+
...:
502+
...: def get_window_bounds(self, num_values, min_periods, center, closed):
503+
...: start = np.empty(num_values, dtype=np.int64)
504+
...: end = np.empty(num_values, dtype=np.int64)
505+
...: for i in range(num_values):
506+
...: if self.use_expanding[i]:
507+
...: start[i] = 0
508+
...: end[i] = i + 1
509+
...: else:
510+
...: start[i] = i
511+
...: end[i] = i + self.window_size
512+
...: return start, end
513+
...:
514+
515+
In [3]: indexer = CustomIndexer(window_size=1, use_expanding=use_expanding)
516+
517+
In [4]: df.rolling(indexer).sum()
518+
Out[4]:
519+
values
520+
0 0.0
521+
1 1.0
522+
2 3.0
523+
3 3.0
524+
4 10.0
525+
526+
469527
.. _stats.rolling_window.endpoints:
470528

471529
Rolling window endpoints

doc/source/whatsnew/v1.0.0.rst

+10
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,16 @@ You can use the alias ``"boolean"`` as well.
169169
s = pd.Series([True, False, None], dtype="boolean")
170170
s
171171
172+
.. _whatsnew_1000.custom_window:
173+
174+
Defining custom windows for rolling operations
175+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
176+
177+
We've added a :func:`pandas.api.indexers.BaseIndexer` class that allows users to define how
178+
window bounds are created during ``rolling`` operations. Users can define their own ``get_window_bounds``
179+
method on a :func:`pandas.api.indexers.BaseIndexer` subclass that will generate the start and end
180+
indices used for each window during the rolling aggregation. For more details and example usage, see
181+
the :ref:`custom window rolling documentation <stats.custom_rolling_window>`
172182

173183
.. _whatsnew_1000.enhancements.other:
174184

pandas/_libs/window/aggregations.pyx

+43-16
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,8 @@ cdef inline void remove_sum(float64_t val, int64_t *nobs, float64_t *sum_x) nogi
183183

184184

185185
def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
186-
ndarray[int64_t] end, int64_t minp):
186+
ndarray[int64_t] end, int64_t minp,
187+
bint is_monotonic_bounds=True):
187188
cdef:
188189
float64_t sum_x = 0
189190
int64_t s, e
@@ -198,11 +199,10 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
198199
s = start[i]
199200
e = end[i]
200201

201-
if i == 0:
202+
if i == 0 or not is_monotonic_bounds:
202203

203204
# setup
204-
sum_x = 0.0
205-
nobs = 0
205+
206206
for j in range(s, e):
207207
add_sum(values[j], &nobs, &sum_x)
208208

@@ -218,6 +218,10 @@ def roll_sum_variable(ndarray[float64_t] values, ndarray[int64_t] start,
218218

219219
output[i] = calc_sum(minp, nobs, sum_x)
220220

221+
if not is_monotonic_bounds:
222+
for j in range(s, e):
223+
remove_sum(values[j], &nobs, &sum_x)
224+
221225
return output
222226

223227

@@ -327,7 +331,8 @@ def roll_mean_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
327331

328332

329333
def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
330-
ndarray[int64_t] end, int64_t minp):
334+
ndarray[int64_t] end, int64_t minp,
335+
bint is_monotonic_bounds=True):
331336
cdef:
332337
float64_t val, sum_x = 0
333338
int64_t s, e
@@ -342,11 +347,9 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
342347
s = start[i]
343348
e = end[i]
344349

345-
if i == 0:
350+
if i == 0 or not is_monotonic_bounds:
346351

347352
# setup
348-
sum_x = 0.0
349-
nobs = 0
350353
for j in range(s, e):
351354
val = values[j]
352355
add_mean(val, &nobs, &sum_x, &neg_ct)
@@ -365,6 +368,10 @@ def roll_mean_variable(ndarray[float64_t] values, ndarray[int64_t] start,
365368

366369
output[i] = calc_mean(minp, nobs, neg_ct, sum_x)
367370

371+
if not is_monotonic_bounds:
372+
for j in range(s, e):
373+
val = values[j]
374+
remove_mean(val, &nobs, &sum_x, &neg_ct)
368375
return output
369376

370377
# ----------------------------------------------------------------------
@@ -486,7 +493,8 @@ def roll_var_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
486493

487494

488495
def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
489-
ndarray[int64_t] end, int64_t minp, int ddof=1):
496+
ndarray[int64_t] end, int64_t minp, int ddof=1,
497+
bint is_monotonic_bounds=True):
490498
"""
491499
Numerically stable implementation using Welford's method.
492500
"""
@@ -508,7 +516,7 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
508516

509517
# Over the first window, observations can only be added
510518
# never removed
511-
if i == 0:
519+
if i == 0 or not is_monotonic_bounds:
512520

513521
for j in range(s, e):
514522
add_var(values[j], &nobs, &mean_x, &ssqdm_x)
@@ -528,6 +536,10 @@ def roll_var_variable(ndarray[float64_t] values, ndarray[int64_t] start,
528536

529537
output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
530538

539+
if not is_monotonic_bounds:
540+
for j in range(s, e):
541+
remove_var(values[j], &nobs, &mean_x, &ssqdm_x)
542+
531543
return output
532544

533545
# ----------------------------------------------------------------------
@@ -629,7 +641,8 @@ def roll_skew_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
629641

630642

631643
def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start,
632-
ndarray[int64_t] end, int64_t minp):
644+
ndarray[int64_t] end, int64_t minp,
645+
bint is_monotonic_bounds=True):
633646
cdef:
634647
float64_t val, prev
635648
float64_t x = 0, xx = 0, xxx = 0
@@ -648,7 +661,7 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start,
648661

649662
# Over the first window, observations can only be added
650663
# never removed
651-
if i == 0:
664+
if i == 0 or not is_monotonic_bounds:
652665

653666
for j in range(s, e):
654667
val = values[j]
@@ -671,6 +684,11 @@ def roll_skew_variable(ndarray[float64_t] values, ndarray[int64_t] start,
671684

672685
output[i] = calc_skew(minp, nobs, x, xx, xxx)
673686

687+
if not is_monotonic_bounds:
688+
for j in range(s, e):
689+
val = values[j]
690+
remove_skew(val, &nobs, &x, &xx, &xxx)
691+
674692
return output
675693

676694
# ----------------------------------------------------------------------
@@ -776,7 +794,8 @@ def roll_kurt_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
776794

777795

778796
def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start,
779-
ndarray[int64_t] end, int64_t minp):
797+
ndarray[int64_t] end, int64_t minp,
798+
bint is_monotonic_bounds=True):
780799
cdef:
781800
float64_t val, prev
782801
float64_t x = 0, xx = 0, xxx = 0, xxxx = 0
@@ -794,7 +813,7 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start,
794813

795814
# Over the first window, observations can only be added
796815
# never removed
797-
if i == 0:
816+
if i == 0 or not is_monotonic_bounds:
798817

799818
for j in range(s, e):
800819
add_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
@@ -814,6 +833,10 @@ def roll_kurt_variable(ndarray[float64_t] values, ndarray[int64_t] start,
814833

815834
output[i] = calc_kurt(minp, nobs, x, xx, xxx, xxxx)
816835

836+
if not is_monotonic_bounds:
837+
for j in range(s, e):
838+
remove_kurt(values[j], &nobs, &x, &xx, &xxx, &xxxx)
839+
817840
return output
818841

819842

@@ -1007,7 +1030,8 @@ def roll_min_fixed(ndarray[float64_t] values, ndarray[int64_t] start,
10071030

10081031

10091032
def roll_min_variable(ndarray[float64_t] values, ndarray[int64_t] start,
1010-
ndarray[int64_t] end, int64_t minp):
1033+
ndarray[int64_t] end, int64_t minp,
1034+
bint is_monotonic_bounds=True):
10111035
"""
10121036
Moving max of 1d array of any numeric type along axis=0 ignoring NaNs.
10131037
@@ -1400,7 +1424,10 @@ def roll_generic_variable(object obj,
14001424
ndarray[int64_t] start, ndarray[int64_t] end,
14011425
int64_t minp,
14021426
int offset, object func, bint raw,
1403-
object args, object kwargs):
1427+
object args, object kwargs,
1428+
bint is_monotonic_bounds=True):
1429+
# is_monotonic_bounds unused since variable algorithm doesn't calculate
1430+
# adds/subtracts across windows, but matches other *_variable functions
14041431
cdef:
14051432
ndarray[float64_t] output, counts, bufarr
14061433
ndarray[float64_t, cast=True] arr

0 commit comments

Comments
 (0)