Skip to content

Commit 81f16ef

Browse files
committed
ENH: Add bt.plot(resample=) and auto-downsample large data
Fixes #35
1 parent ad4c48c commit 81f16ef

File tree

5 files changed

+157
-59
lines changed

5 files changed

+157
-59
lines changed

backtesting/_plotting.py

+71-4
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,72 @@ def lightness(color, lightness=.94):
8585
return color.to_rgb()
8686

8787

88+
_MAX_CANDLES = 10000
89+
90+
91+
def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
92+
if isinstance(resample_rule, str):
93+
freq = resample_rule
94+
else:
95+
if len(df) < _MAX_CANDLES:
96+
return df, indicators, equity_data, trades
97+
98+
from_index = dict(day=-2, hour=-6, minute=1, second=0, millisecond=0,
99+
microsecond=0, nanosecond=0)[df.index.resolution]
100+
FREQS = ('1T', '5T', '10T', '15T', '30T', '1H', '2H', '4H', '8H', '1D', '1W', '1M')
101+
freq = next((f for f in FREQS[from_index:]
102+
if len(df.resample(f)) <= _MAX_CANDLES), FREQS[-1])
103+
warnings.warn("Data contains too many candlesticks to plot; downsampling to {!r}. "
104+
"See `Backtest.plot(resample=...)`".format(freq))
105+
106+
from .lib import OHLCV_AGG, TRADES_AGG, _EQUITY_AGG
107+
df = df.resample(freq, label='right').agg(OHLCV_AGG).dropna()
108+
109+
indicators = [_Indicator(i.df.resample(freq, label='right').mean()
110+
.dropna().reindex(df.index).values.T,
111+
**dict(i._opts, name=i.name,
112+
# HACK: override `data` for its index
113+
data=pd.Series(np.nan, index=df.index)))
114+
for i in indicators]
115+
assert not indicators or indicators[0].df.index.equals(df.index)
116+
117+
equity_data = equity_data.resample(freq, label='right').agg(_EQUITY_AGG).dropna(how='all')
118+
assert equity_data.index.equals(df.index)
119+
120+
def _weighted_returns(s, trades=trades):
121+
df = trades.loc[s.index]
122+
return ((df['Size'].abs() * df['ReturnPct']) / df['Size'].abs().sum()).sum()
123+
124+
def _group_trades(column):
125+
def f(s, new_index=df.index.astype(np.int64), bars=trades[column]):
126+
if s.size:
127+
# Via int64 because on pandas recently broken datetime
128+
mean_time = int(bars.loc[s.index].view('i8').mean())
129+
new_bar_idx = new_index.get_loc(mean_time, method='nearest')
130+
return new_bar_idx
131+
return f
132+
133+
if len(trades): # Avoid pandas "resampling on Int64 index" error
134+
trades = trades.assign(count=1).resample(freq, on='ExitTime', label='right').agg(dict(
135+
TRADES_AGG,
136+
ReturnPct=_weighted_returns,
137+
count='sum',
138+
EntryBar=_group_trades('EntryTime'),
139+
ExitBar=_group_trades('ExitTime'),
140+
)).dropna()
141+
142+
return df, indicators, equity_data, trades
143+
144+
88145
def plot(*, results: pd.Series,
89146
df: pd.DataFrame,
90147
indicators: List[_Indicator],
91148
filename='', plot_width=None,
92149
plot_equity=True, plot_pl=True,
93150
plot_volume=True, plot_drawdown=False,
94151
smooth_equity=False, relative_equity=True,
95-
superimpose=True, show_legend=True, open_browser=True):
152+
superimpose=True, resample=True,
153+
show_legend=True, open_browser=True):
96154
"""
97155
Like much of GUI code everywhere, this is a mess.
98156
"""
@@ -111,15 +169,19 @@ def plot(*, results: pd.Series,
111169
trades = results['_trades']
112170

113171
plot_volume = plot_volume and not df.Volume.isnull().all()
114-
time_resolution = getattr(df.index, 'resolution', None)
115172
is_datetime_index = df.index.is_all_dates
116173

117174
from .lib import OHLCV_AGG
118175
# ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these
119176
df = df[list(OHLCV_AGG.keys())].copy(deep=False)
177+
178+
# Limit data to max_candles
179+
if is_datetime_index:
180+
df, indicators, equity_data, trades = _maybe_resample_data(
181+
resample, df, indicators, equity_data, trades)
182+
120183
df.index.name = None # Provides source name @index
121184
df['datetime'] = df.index # Save original, maybe datetime index
122-
123185
df = df.reset_index(drop=True)
124186
equity_data = equity_data.reset_index(drop=True)
125187
index = df.index
@@ -319,11 +381,15 @@ def _plot_pl_section():
319381
trade_source.add(returns_long, 'returns_long')
320382
trade_source.add(returns_short, 'returns_short')
321383
trade_source.add(size, 'marker_size')
384+
if 'count' in trades:
385+
trade_source.add(trades['count'], 'count')
322386
r1 = fig.scatter('index', 'returns_long', source=trade_source, fill_color=cmap,
323387
marker='triangle', line_color='black', size='marker_size')
324388
r2 = fig.scatter('index', 'returns_short', source=trade_source, fill_color=cmap,
325389
marker='inverted_triangle', line_color='black', size='marker_size')
326390
tooltips = [("Size", "@size{0,0}")]
391+
if 'count' in trades:
392+
tooltips.append(("Count", "@count{0,0}"))
327393
set_tooltips(fig, tooltips + [("P/L", "@returns_long{+0.[000]%}")],
328394
vline=False, renderers=[r1])
329395
set_tooltips(fig, tooltips + [("P/L", "@returns_short{+0.[000]%}")],
@@ -344,8 +410,9 @@ def _plot_volume_section():
344410

345411
def _plot_superimposed_ohlc():
346412
"""Superimposed, downsampled vbars"""
413+
time_resolution = pd.DatetimeIndex(df['datetime']).resolution
347414
resample_rule = (superimpose if isinstance(superimpose, str) else
348-
dict(day='W',
415+
dict(day='M',
349416
hour='D',
350417
minute='H',
351418
second='T',

backtesting/autoscale_cb.js

+17-46
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,13 @@
1-
if (!window._bt_extremes)
2-
window._bt_extremes = function (arr, initial, agg_func) {
3-
const CHUNK = 32768;
4-
let extreme = initial;
5-
for (let i = 0, len = arr.length; i < len; i += CHUNK) {
6-
const subarr = CHUNK >= len ? arr : arr.slice(i, i + CHUNK);
7-
extreme = agg_func(extreme, agg_func.apply(null, subarr));
8-
}
9-
return extreme;
10-
};
11-
12-
if (!window._bt_bin_search)
13-
window._bt_bin_search = function (index, value) {
14-
let mid,
15-
min = 0,
16-
max = index.length - 1;
17-
18-
while (min < max) {
19-
mid = (min + max) / 2 | 0;
20-
if (index[mid] < value)
21-
min = mid + 1;
22-
else
23-
max = mid - 1;
24-
}
25-
return min;
26-
};
27-
28-
if (!window._bt_scale_range)
29-
window._bt_scale_range = function (range, highs, lows) {
30-
const max = _bt_extremes(highs, -Infinity, Math.max),
31-
min = lows && _bt_extremes(lows, Infinity, Math.min);
1+
if (!window._bt_scale_range) {
2+
window._bt_scale_range = function (range, min, max, pad) {
3+
"use strict";
324
if (min !== Infinity && max !== -Infinity) {
33-
const pad = (max - min) * .03;
5+
pad = pad ? (max - min) * .03 : 0;
346
range.start = min - pad;
357
range.end = max + pad;
36-
}
8+
} else console.error('backtesting: scale range error:', min, max, range);
379
};
10+
}
3811

3912
clearTimeout(window._bt_autoscale_timeout);
4013

@@ -45,20 +18,18 @@ window._bt_autoscale_timeout = setTimeout(function () {
4518
* @variable ohlc_range `fig_ohlc.y_range`.
4619
* @variable volume_range `fig_volume.y_range`.
4720
*/
21+
"use strict";
22+
23+
let i = Math.max(Math.floor(cb_obj.start), 0),
24+
j = Math.min(Math.ceil(cb_obj.end), source.data['ohlc_high'].length);
4825

49-
let index = source.data['index'],
50-
i = Math.max(_bt_bin_search(index, cb_obj.start) - 1, 0),
51-
j = Math.min(_bt_bin_search(index, cb_obj.end) + 1, index.length);
26+
let max = Math.max.apply(null, source.data['ohlc_high'].slice(i, j)),
27+
min = Math.min.apply(null, source.data['ohlc_low'].slice(i, j));
28+
_bt_scale_range(ohlc_range, min, max, true);
5229

53-
_bt_scale_range(
54-
ohlc_range,
55-
source.data['ohlc_high'].slice(i, j),
56-
source.data['ohlc_low'].slice(i, j));
57-
try {
58-
_bt_scale_range(
59-
volume_range,
60-
source.data['Volume'].slice(i, j),
61-
0);
62-
} catch (e) {} // volume_range may be undefined
30+
if (volume_range) {
31+
max = Math.max.apply(null, source.data['Volume'].slice(i, j));
32+
_bt_scale_range(volume_range, 0, max * 1.03, false);
33+
}
6334

6435
}, 50);

backtesting/backtesting.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def init():
149149

150150
value = _Indicator(value, name=name, plot=plot, overlay=overlay,
151151
color=color, scatter=scatter,
152-
# lib.resample_apply() uses this:
152+
# _Indicator.s Series accessor uses this:
153153
data=self.data)
154154
self._indicators.append(value)
155155
return value
@@ -987,7 +987,8 @@ def __init__(self,
987987
stacklevel=2)
988988
data = data.sort_index()
989989
if not data.index.is_all_dates:
990-
warnings.warn('Data index is not datetime. Assuming simple periods.',
990+
warnings.warn('Data index is not datetime. Assuming simple periods, '
991+
'but `pd.DateTimeIndex` is advised.',
991992
stacklevel=2)
992993

993994
self._data = data # type: pd.DataFrame
@@ -1041,6 +1042,10 @@ def run(self, **kwargs) -> pd.Series:
10411042
# Next tick, a moment before bar close
10421043
strategy.next()
10431044

1045+
# Set data back to full length
1046+
# for future `indicator._opts['data'].index` calls to work
1047+
data._set_length(len(self._data))
1048+
10441049
self._results = self._compute_stats(broker, strategy)
10451050
return self._results
10461051

@@ -1285,6 +1290,7 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
12851290
plot_volume=True, plot_drawdown=False,
12861291
smooth_equity=False, relative_equity=True,
12871292
superimpose: Union[bool, str] = True,
1293+
resample=True,
12881294
show_legend=True, open_browser=True):
12891295
"""
12901296
Plot the progression of the last backtest run.
@@ -1322,16 +1328,31 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13221328
If `relative_equity` is `True`, scale and label equity graph axis
13231329
with return percent, not absolute cash-equivalent values.
13241330
1325-
If `superimpose` is `True`, superimpose downsampled candlesticks
1326-
over the original candlestick chart. Default downsampling is:
1327-
weekly for daily data, daily for hourly data, hourly for minute data,
1328-
and minute for second and sub-second data.
1329-
`superimpose` can also be a string, in which case it is a valid
1330-
[Pandas offset string], such as `'5T'` or `'5min'`.
1331+
If `superimpose` is `True`, superimpose larger-timeframe candlesticks
1332+
over the original candlestick chart. Default downsampling rule is:
1333+
monthly for daily data, daily for hourly data, hourly for minute data,
1334+
and minute for (sub-)second data.
1335+
`superimpose` can also be a valid [Pandas offset string],
1336+
such as `'5T'` or `'5min'`, in which case this frequency will be
1337+
used to superimpose.
13311338
Note, this only works for data with a datetime index.
13321339
1340+
If `resample` is `True`, the OHLC data is resampled in a way that
1341+
makes the upper number of candles for Bokeh to plot limited to 10_000.
1342+
This may, in situations of overabundant data,
1343+
improve plot's interactive performance and avoid browser's
1344+
`Javascript Error: Maximum call stack size exceeded` or similar.
1345+
Equity & dropdown curves and individual trades data is,
1346+
likewise, [reasonably _aggregated_][TRADES_AGG].
1347+
`resample` can also be a [Pandas offset string],
1348+
such as `'5T'` or `'5min'`, in which case this frequency will be
1349+
used to resample, overriding above numeric limitation.
1350+
Note, all this only works for data with a datetime index.
1351+
13331352
[Pandas offset string]: \
1334-
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
1353+
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
1354+
1355+
[TRADES_AGG]: lib.html#backtesting.lib.TRADES_AGG
13351356
13361357
If `show_legend` is `True`, the resulting plot graphs will contain
13371358
labeled legends.
@@ -1357,5 +1378,6 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13571378
smooth_equity=smooth_equity,
13581379
relative_equity=relative_equity,
13591380
superimpose=superimpose,
1381+
resample=resample,
13601382
show_legend=show_legend,
13611383
open_browser=open_browser)

backtesting/lib.py

+25
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,31 @@
4040
df.resample('4H', label='right').agg(OHLCV_AGG)
4141
"""
4242

43+
TRADES_AGG = OrderedDict((
44+
('Size', 'sum'),
45+
('EntryBar', 'first'),
46+
('ExitBar', 'last'),
47+
('EntryPrice', 'mean'),
48+
('ExitPrice', 'mean'),
49+
('PnL', 'sum'),
50+
('ReturnPct', 'mean'),
51+
('EntryTime', 'first'),
52+
('ExitTime', 'last'),
53+
('Duration', 'sum'),
54+
))
55+
"""Dictionary of rules for aggregating resampled trades data,
56+
e.g.
57+
58+
stats['_trades'].resample('1D', on='ExitTime',
59+
label='right').agg(TRADES_AGG)
60+
"""
61+
62+
_EQUITY_AGG = {
63+
'Equity': 'mean',
64+
'DrawdownPct': 'max',
65+
'DrawdownDuration': 'max',
66+
}
67+
4368

4469
def barssince(condition: Sequence[bool], default=np.inf) -> int:
4570
"""

backtesting/test/_test.py

+13
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from runpy import run_path
99
from tempfile import NamedTemporaryFile, gettempdir
1010
from unittest import TestCase
11+
from unittest.mock import patch
1112

1213
import numpy as np
1314
import pandas as pd
@@ -422,6 +423,7 @@ def test_params(self):
422423
plot_pl=False,
423424
plot_drawdown=True,
424425
superimpose=False,
426+
resample='1W',
425427
smooth_equity=False,
426428
relative_equity=False,
427429
show_legend=False).items():
@@ -504,6 +506,17 @@ def next(self):
504506
# Give browser time to open before tempfile is removed
505507
time.sleep(1)
506508

509+
def test_resample(self):
510+
bt = Backtest(GOOG, SmaCross)
511+
bt.run()
512+
import backtesting._plotting
513+
with _tempfile() as f,\
514+
patch.object(backtesting._plotting, '_MAX_CANDLES', 10),\
515+
self.assertWarns(UserWarning):
516+
bt.plot(filename=f, resample=True)
517+
# Give browser time to open before tempfile is removed
518+
time.sleep(1)
519+
507520
def test_indicator_color(self):
508521
class S(Strategy):
509522
def init(self):

0 commit comments

Comments
 (0)