Skip to content

Commit aef8368

Browse files
committed
ENH: Add bt.plot(resample=) and auto-downsample large data
Fixes #35
1 parent 1541283 commit aef8368

File tree

4 files changed

+128
-13
lines changed

4 files changed

+128
-13
lines changed

backtesting/_plotting.py

+64-4
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,65 @@ def lightness(color, lightness=.94):
8585
return color.to_rgb()
8686

8787

88+
_MAX_CANDLES = 10_000
89+
90+
91+
def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
92+
if not resample_rule or len(df) < _MAX_CANDLES:
93+
return df, indicators, equity_data, trades
94+
95+
if isinstance(resample_rule, str):
96+
freq = resample_rule
97+
else:
98+
from_index = dict(day=-2, hour=-6, minute=1, second=0, millisecond=0,
99+
microsecond=0, nanosecond=0)[df.index.resolution]
100+
FREQS = ('1T', '5T', '10T', '15T', '30T', '1H', '2H', '4H', '8H', '1D', '1W', '1M')
101+
freq = next((f for f in FREQS[from_index:]
102+
if len(df.resample(f)) <= _MAX_CANDLES), FREQS[-1])
103+
warnings.warn("Data contains too many candlesticks to plot; downsampling to {!r}. "
104+
"See `Backtest.plot(resample=...)`".format(freq))
105+
106+
from .lib import OHLCV_AGG, TRADES_AGG, _EQUITY_AGG
107+
df = df.resample(freq, label='right').agg(OHLCV_AGG).dropna()
108+
109+
# XXX: copy(True) pandas bug https://github.com/pandas-dev/pandas/issues/31710
110+
indicators = [_Indicator(i.s.copy(True).resample(freq).mean().dropna().reindex(df.index),
111+
**dict(i._opts,
112+
# HACK: override `data` for its index
113+
data=pd.Series(np.nan, index=df.index)))
114+
for i in indicators]
115+
assert not indicators or indicators[0].s.index.equals(df.index)
116+
117+
equity_data = equity_data.resample(freq, label='right').agg(_EQUITY_AGG).dropna(how='all')
118+
assert equity_data.index.equals(df.index)
119+
120+
def _weighted_returns(s, trades=trades):
121+
df = trades.loc[s.index]
122+
return ((df['Size'].abs() * df['ReturnPct']) / df['Size'].abs().sum()).sum()
123+
124+
trades = trades.assign(count=1).resample(freq, on='ExitTime', label='right').agg(dict(
125+
TRADES_AGG,
126+
ReturnPct=_weighted_returns,
127+
count='sum',
128+
# XXX: Can this prettier?
129+
EntryBar=(lambda s, trades=trades, index=df.index:
130+
index.get_loc(trades.loc[s.index]['EntryTime'].mean(), method='nearest')),
131+
ExitBar=(lambda s, trades=trades, index=df.index:
132+
index.get_loc(trades.loc[s.index]['ExitTime'].mean(), method='nearest')),
133+
)).dropna()
134+
135+
return df, indicators, equity_data, trades
136+
137+
88138
def plot(*, results: pd.Series,
89139
df: pd.DataFrame,
90140
indicators: List[_Indicator],
91141
filename='', plot_width=None,
92142
plot_equity=True, plot_pl=True,
93143
plot_volume=True, plot_drawdown=False,
94144
smooth_equity=False, relative_equity=True,
95-
superimpose=True, show_legend=True, open_browser=True):
145+
superimpose=True, resample=True,
146+
show_legend=True, open_browser=True):
96147
"""
97148
Like much of GUI code everywhere, this is a mess.
98149
"""
@@ -111,15 +162,19 @@ def plot(*, results: pd.Series,
111162
trades = results['_trades']
112163

113164
plot_volume = plot_volume and not df.Volume.isnull().all()
114-
time_resolution = getattr(df.index, 'resolution', None)
115165
is_datetime_index = df.index.is_all_dates
116166

117167
from .lib import OHLCV_AGG
118168
# ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these
119169
df = df[list(OHLCV_AGG.keys())].copy(deep=False)
170+
171+
# Limit data to max_candles
172+
if is_datetime_index:
173+
df, indicators, equity_data, trades = _maybe_resample_data(
174+
resample, df, indicators, equity_data, trades)
175+
120176
df.index.name = None # Provides source name @index
121177
df['datetime'] = df.index # Save original, maybe datetime index
122-
123178
df = df.reset_index(drop=True)
124179
equity_data = equity_data.reset_index(drop=True)
125180
index = df.index
@@ -222,6 +277,10 @@ def _plot_equity_section():
222277
x1, x2 = dd_end - 1, dd_end
223278
y, y1, y2 = equity[dd_start], equity[x1], equity[x2]
224279
dd_end -= (1 - (y - y1) / (y2 - y1)) * (dd_end - x1) # y = a x + b
280+
# If _plot_resample_data() was applied,
281+
# the agg'd equity might have "stretched" the calculation
282+
# XXX: test this?
283+
dd_end = min(dd_end, equity.index[-1])
225284

226285
if smooth_equity:
227286
interest_points = pd.Index([
@@ -339,8 +398,9 @@ def _plot_volume_section():
339398

340399
def _plot_superimposed_ohlc():
341400
"""Superimposed, downsampled vbars"""
401+
time_resolution = pd.DatetimeIndex(df['datetime']).resolution
342402
resample_rule = (superimpose if isinstance(superimpose, str) else
343-
dict(day='W',
403+
dict(day='M',
344404
hour='D',
345405
minute='H',
346406
second='T',

backtesting/backtesting.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def init():
141141

142142
value = _Indicator(value, name=name, plot=plot, overlay=overlay,
143143
color=color, scatter=scatter,
144-
# lib.resample_apply() uses this:
144+
# _Indicator.s Series accessor uses this:
145145
data=self.data)
146146
self._indicators.append(value)
147147
return value
@@ -952,7 +952,8 @@ def __init__(self,
952952
stacklevel=2)
953953
data = data.sort_index()
954954
if not data.index.is_all_dates:
955-
warnings.warn('Data index is not datetime. Assuming simple periods.',
955+
warnings.warn('Data index is not datetime. Assuming simple periods, '
956+
'but `pd.DateTimeIndex` is advised.',
956957
stacklevel=2)
957958

958959
self._data = data # type: pd.DataFrame
@@ -1250,6 +1251,7 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
12501251
plot_volume=True, plot_drawdown=False,
12511252
smooth_equity=False, relative_equity=True,
12521253
superimpose: Union[bool, str] = True,
1254+
resample=True,
12531255
show_legend=True, open_browser=True):
12541256
"""
12551257
Plot the progression of the last backtest run.
@@ -1287,16 +1289,31 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
12871289
If `relative_equity` is `True`, scale and label equity graph axis
12881290
with return percent, not absolute cash-equivalent values.
12891291
1290-
If `superimpose` is `True`, superimpose downsampled candlesticks
1291-
over the original candlestick chart. Default downsampling is:
1292-
weekly for daily data, daily for hourly data, hourly for minute data,
1293-
and minute for second and sub-second data.
1294-
`superimpose` can also be a string, in which case it is a valid
1295-
[Pandas offset string], such as `'5T'` or `'5min'`.
1292+
If `superimpose` is `True`, superimpose larger-timeframe candlesticks
1293+
over the original candlestick chart. Default downsampling rule is:
1294+
monthly for daily data, daily for hourly data, hourly for minute data,
1295+
and minute for (sub-)second data.
1296+
`superimpose` can also be a valid [Pandas offset string],
1297+
such as `'5T'` or `'5min'`, in which case this frequency will be
1298+
used to superimpose.
12961299
Note, this only works for data with a datetime index.
12971300
1301+
If `resample` is `True`, the OHLC data is resampled in a way that
1302+
makes the upper number of candles for Bokeh to plot limited to 10_000.
1303+
This may, in situations of overabundant data,
1304+
improve plot's interactive performance and avoid browser's
1305+
`Javascript Error: Maximum call stack size exceeded` or similar.
1306+
Equity & dropdown curves and individual trades data is,
1307+
likewise, [reasonably _aggregated_][TRADES_AGG].
1308+
`resample` can also be a [Pandas offset string],
1309+
such as `'5T'` or `'5min'`, in which case this frequency will be
1310+
used to resample, overriding above numeric limitation.
1311+
Note, all this only works for data with a datetime index.
1312+
12981313
[Pandas offset string]: \
1299-
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
1314+
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
1315+
1316+
[TRADES_AGG]: lib.html#backtesting.lib.TRADES_AGG
13001317
13011318
If `show_legend` is `True`, the resulting plot graphs will contain
13021319
labeled legends.
@@ -1322,5 +1339,6 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13221339
smooth_equity=smooth_equity,
13231340
relative_equity=relative_equity,
13241341
superimpose=superimpose,
1342+
resample=resample,
13251343
show_legend=show_legend,
13261344
open_browser=open_browser)

backtesting/lib.py

+25
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,31 @@
4040
df.resample('4H', label='right').agg(OHLCV_AGG)
4141
"""
4242

43+
TRADES_AGG = OrderedDict((
44+
('Size', 'sum'),
45+
('EntryBar', 'first'),
46+
('ExitBar', 'last'),
47+
('EntryPrice', 'mean'),
48+
('ExitPrice', 'mean'),
49+
('PnL', 'sum'),
50+
('ReturnPct', 'mean'),
51+
('EntryTime', 'first'),
52+
('ExitTime', 'last'),
53+
('Duration', 'sum'),
54+
))
55+
"""Dictionary of rules for aggregating resampled trades data,
56+
e.g.
57+
58+
stats['_trades'].resample('1D', on='ExitTime',
59+
label='right').agg(TRADES_AGG)
60+
"""
61+
62+
_EQUITY_AGG = {
63+
'Equity': 'mean',
64+
'DrawdownPct': 'max',
65+
'DrawdownDuration': 'max',
66+
}
67+
4368

4469
def barssince(condition: Sequence[bool], default=np.inf) -> int:
4570
"""

backtesting/test/_test.py

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from runpy import run_path
99
from tempfile import NamedTemporaryFile, gettempdir
1010
from unittest import TestCase
11+
from unittest.mock import patch
1112

1213
import numpy as np
1314
import pandas as pd
@@ -500,6 +501,17 @@ def next(self):
500501
# Give browser time to open before tempfile is removed
501502
time.sleep(1)
502503

504+
def test_resample(self):
505+
bt = Backtest(GOOG, SmaCross)
506+
bt.run()
507+
import backtesting._plotting
508+
with _tempfile() as f,\
509+
patch.object(backtesting._plotting, '_MAX_CANDLES', 10),\
510+
self.assertWarns(UserWarning):
511+
bt.plot(filename=f, resample=True)
512+
# Give browser time to open before tempfile is removed
513+
time.sleep(1)
514+
503515
def test_indicator_color(self):
504516
class S(Strategy):
505517
def init(self):

0 commit comments

Comments
 (0)