Skip to content

Commit 1219913

Browse files
committed
ENH: Add bt.plot(resample=) and auto-downsample large data
Fixes #35
1 parent 7027bfc commit 1219913

File tree

5 files changed

+156
-59
lines changed

5 files changed

+156
-59
lines changed

backtesting/_plotting.py

+71-4
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,72 @@ def lightness(color, lightness=.94):
8585
return color.to_rgb()
8686

8787

88+
_MAX_CANDLES = 10000
89+
90+
91+
def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
92+
if isinstance(resample_rule, str):
93+
freq = resample_rule
94+
else:
95+
if len(df) < _MAX_CANDLES:
96+
return df, indicators, equity_data, trades
97+
98+
from_index = dict(day=-2, hour=-6, minute=1, second=0, millisecond=0,
99+
microsecond=0, nanosecond=0)[df.index.resolution]
100+
FREQS = ('1T', '5T', '10T', '15T', '30T', '1H', '2H', '4H', '8H', '1D', '1W', '1M')
101+
freq = next((f for f in FREQS[from_index:]
102+
if len(df.resample(f)) <= _MAX_CANDLES), FREQS[-1])
103+
warnings.warn("Data contains too many candlesticks to plot; downsampling to {!r}. "
104+
"See `Backtest.plot(resample=...)`".format(freq))
105+
106+
from .lib import OHLCV_AGG, TRADES_AGG, _EQUITY_AGG
107+
df = df.resample(freq, label='right').agg(OHLCV_AGG).dropna()
108+
109+
indicators = [_Indicator(i.df.resample(freq, label='right').mean()
110+
.dropna().reindex(df.index).values.T,
111+
**dict(i._opts, name=i.name,
112+
# HACK: override `data` for its index
113+
data=pd.Series(np.nan, index=df.index)))
114+
for i in indicators]
115+
assert not indicators or indicators[0].df.index.equals(df.index)
116+
117+
equity_data = equity_data.resample(freq, label='right').agg(_EQUITY_AGG).dropna(how='all')
118+
assert equity_data.index.equals(df.index)
119+
120+
def _weighted_returns(s, trades=trades):
121+
df = trades.loc[s.index]
122+
return ((df['Size'].abs() * df['ReturnPct']) / df['Size'].abs().sum()).sum()
123+
124+
def _group_trades(column):
125+
def f(s, new_index=df.index.astype(np.int64), bars=trades[column]):
126+
if s.size:
127+
# Via int64 because on pandas recently broken datetime
128+
mean_time = int(bars.loc[s.index].view('i8').mean())
129+
new_bar_idx = new_index.get_loc(mean_time, method='nearest')
130+
return new_bar_idx
131+
return f
132+
133+
if len(trades): # Avoid pandas "resampling on Int64 index" error
134+
trades = trades.assign(count=1).resample(freq, on='ExitTime', label='right').agg(dict(
135+
TRADES_AGG,
136+
ReturnPct=_weighted_returns,
137+
count='sum',
138+
EntryBar=_group_trades('EntryTime'),
139+
ExitBar=_group_trades('ExitTime'),
140+
)).dropna()
141+
142+
return df, indicators, equity_data, trades
143+
144+
88145
def plot(*, results: pd.Series,
89146
df: pd.DataFrame,
90147
indicators: List[_Indicator],
91148
filename='', plot_width=None,
92149
plot_equity=True, plot_pl=True,
93150
plot_volume=True, plot_drawdown=False,
94151
smooth_equity=False, relative_equity=True,
95-
superimpose=True, show_legend=True, open_browser=True):
152+
superimpose=True, resample=True,
153+
show_legend=True, open_browser=True):
96154
"""
97155
Like much of GUI code everywhere, this is a mess.
98156
"""
@@ -111,15 +169,19 @@ def plot(*, results: pd.Series,
111169
trades = results['_trades']
112170

113171
plot_volume = plot_volume and not df.Volume.isnull().all()
114-
time_resolution = getattr(df.index, 'resolution', None)
115172
is_datetime_index = df.index.is_all_dates
116173

117174
from .lib import OHLCV_AGG
118175
# ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these
119176
df = df[list(OHLCV_AGG.keys())].copy(deep=False)
177+
178+
# Limit data to max_candles
179+
if is_datetime_index:
180+
df, indicators, equity_data, trades = _maybe_resample_data(
181+
resample, df, indicators, equity_data, trades)
182+
120183
df.index.name = None # Provides source name @index
121184
df['datetime'] = df.index # Save original, maybe datetime index
122-
123185
df = df.reset_index(drop=True)
124186
equity_data = equity_data.reset_index(drop=True)
125187
index = df.index
@@ -319,11 +381,15 @@ def _plot_pl_section():
319381
trade_source.add(returns_long, 'returns_long')
320382
trade_source.add(returns_short, 'returns_short')
321383
trade_source.add(size, 'marker_size')
384+
if 'count' in trades:
385+
trade_source.add(trades['count'], 'count')
322386
r1 = fig.scatter('index', 'returns_long', source=trade_source, fill_color=cmap,
323387
marker='triangle', line_color='black', size='marker_size')
324388
r2 = fig.scatter('index', 'returns_short', source=trade_source, fill_color=cmap,
325389
marker='inverted_triangle', line_color='black', size='marker_size')
326390
tooltips = [("Size", "@size{0,0}")]
391+
if 'count' in trades:
392+
tooltips.append(("Count", "@count{0,0}"))
327393
set_tooltips(fig, tooltips + [("P/L", "@returns_long{+0.[000]%}")],
328394
vline=False, renderers=[r1])
329395
set_tooltips(fig, tooltips + [("P/L", "@returns_short{+0.[000]%}")],
@@ -344,8 +410,9 @@ def _plot_volume_section():
344410

345411
def _plot_superimposed_ohlc():
346412
"""Superimposed, downsampled vbars"""
413+
time_resolution = pd.DatetimeIndex(df['datetime']).resolution
347414
resample_rule = (superimpose if isinstance(superimpose, str) else
348-
dict(day='W',
415+
dict(day='M',
349416
hour='D',
350417
minute='H',
351418
second='T',

backtesting/autoscale_cb.js

+17-46
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,13 @@
1-
if (!window._bt_extremes)
2-
window._bt_extremes = function (arr, initial, agg_func) {
3-
const CHUNK = 32768;
4-
let extreme = initial;
5-
for (let i = 0, len = arr.length; i < len; i += CHUNK) {
6-
const subarr = CHUNK >= len ? arr : arr.slice(i, i + CHUNK);
7-
extreme = agg_func(extreme, agg_func.apply(null, subarr));
8-
}
9-
return extreme;
10-
};
11-
12-
if (!window._bt_bin_search)
13-
window._bt_bin_search = function (index, value) {
14-
let mid,
15-
min = 0,
16-
max = index.length - 1;
17-
18-
while (min < max) {
19-
mid = (min + max) / 2 | 0;
20-
if (index[mid] < value)
21-
min = mid + 1;
22-
else
23-
max = mid - 1;
24-
}
25-
return min;
26-
};
27-
28-
if (!window._bt_scale_range)
29-
window._bt_scale_range = function (range, highs, lows) {
30-
const max = _bt_extremes(highs, -Infinity, Math.max),
31-
min = lows && _bt_extremes(lows, Infinity, Math.min);
1+
if (!window._bt_scale_range) {
2+
window._bt_scale_range = function (range, min, max, pad) {
3+
"use strict";
324
if (min !== Infinity && max !== -Infinity) {
33-
const pad = (max - min) * .03;
5+
pad = pad ? (max - min) * .03 : 0;
346
range.start = min - pad;
357
range.end = max + pad;
36-
}
8+
} else console.error('backtesting: scale range error:', min, max, range);
379
};
10+
}
3811

3912
clearTimeout(window._bt_autoscale_timeout);
4013

@@ -45,20 +18,18 @@ window._bt_autoscale_timeout = setTimeout(function () {
4518
* @variable ohlc_range `fig_ohlc.y_range`.
4619
* @variable volume_range `fig_volume.y_range`.
4720
*/
21+
"use strict";
22+
23+
let i = Math.max(Math.floor(cb_obj.start), 0),
24+
j = Math.min(Math.ceil(cb_obj.end), source.data['ohlc_high'].length);
4825

49-
let index = source.data['index'],
50-
i = Math.max(_bt_bin_search(index, cb_obj.start) - 1, 0),
51-
j = Math.min(_bt_bin_search(index, cb_obj.end) + 1, index.length);
26+
let max = Math.max.apply(null, source.data['ohlc_high'].slice(i, j)),
27+
min = Math.min.apply(null, source.data['ohlc_low'].slice(i, j));
28+
_bt_scale_range(ohlc_range, min, max, true);
5229

53-
_bt_scale_range(
54-
ohlc_range,
55-
source.data['ohlc_high'].slice(i, j),
56-
source.data['ohlc_low'].slice(i, j));
57-
try {
58-
_bt_scale_range(
59-
volume_range,
60-
source.data['Volume'].slice(i, j),
61-
0);
62-
} catch (e) {} // volume_range may be undefined
30+
if (volume_range) {
31+
max = Math.max.apply(null, source.data['Volume'].slice(i, j));
32+
_bt_scale_range(volume_range, 0, max * 1.03, false);
33+
}
6334

6435
}, 50);

backtesting/backtesting.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def init():
150150

151151
value = _Indicator(value, name=name, plot=plot, overlay=overlay,
152152
color=color, scatter=scatter,
153-
# lib.resample_apply() uses this:
153+
# _Indicator.s Series accessor uses this:
154154
data=self.data)
155155
self._indicators.append(value)
156156
return value
@@ -992,7 +992,8 @@ def __init__(self,
992992
stacklevel=2)
993993
data = data.sort_index()
994994
if not data.index.is_all_dates:
995-
warnings.warn('Data index is not datetime. Assuming simple periods.',
995+
warnings.warn('Data index is not datetime. Assuming simple periods, '
996+
'but `pd.DateTimeIndex` is advised.',
996997
stacklevel=2)
997998

998999
self._data = data # type: pd.DataFrame
@@ -1046,6 +1047,10 @@ def run(self, **kwargs) -> pd.Series:
10461047
# Next tick, a moment before bar close
10471048
strategy.next()
10481049

1050+
# Set data back to full length
1051+
# for future `indicator._opts['data'].index` calls to work
1052+
data._set_length(len(self._data))
1053+
10491054
self._results = self._compute_stats(broker, strategy)
10501055
return self._results
10511056

@@ -1310,6 +1315,7 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13101315
plot_volume=True, plot_drawdown=False,
13111316
smooth_equity=False, relative_equity=True,
13121317
superimpose: Union[bool, str] = True,
1318+
resample=True,
13131319
show_legend=True, open_browser=True):
13141320
"""
13151321
Plot the progression of the last backtest run.
@@ -1347,16 +1353,31 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13471353
If `relative_equity` is `True`, scale and label equity graph axis
13481354
with return percent, not absolute cash-equivalent values.
13491355
1350-
If `superimpose` is `True`, superimpose downsampled candlesticks
1351-
over the original candlestick chart. Default downsampling is:
1352-
weekly for daily data, daily for hourly data, hourly for minute data,
1353-
and minute for second and sub-second data.
1354-
`superimpose` can also be a string, in which case it is a valid
1355-
[Pandas offset string], such as `'5T'` or `'5min'`.
1356+
If `superimpose` is `True`, superimpose larger-timeframe candlesticks
1357+
over the original candlestick chart. Default downsampling rule is:
1358+
monthly for daily data, daily for hourly data, hourly for minute data,
1359+
and minute for (sub-)second data.
1360+
`superimpose` can also be a valid [Pandas offset string],
1361+
such as `'5T'` or `'5min'`, in which case this frequency will be
1362+
used to superimpose.
13561363
Note, this only works for data with a datetime index.
13571364
1365+
If `resample` is `True`, the OHLC data is resampled in a way that
1366+
makes the upper number of candles for Bokeh to plot limited to 10_000.
1367+
This may, in situations of overabundant data,
1368+
improve plot's interactive performance and avoid browser's
1369+
`Javascript Error: Maximum call stack size exceeded` or similar.
1370+
Equity & dropdown curves and individual trades data is,
1371+
likewise, [reasonably _aggregated_][TRADES_AGG].
1372+
`resample` can also be a [Pandas offset string],
1373+
such as `'5T'` or `'5min'`, in which case this frequency will be
1374+
used to resample, overriding above numeric limitation.
1375+
Note, all this only works for data with a datetime index.
1376+
13581377
[Pandas offset string]: \
1359-
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
1378+
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
1379+
1380+
[TRADES_AGG]: lib.html#backtesting.lib.TRADES_AGG
13601381
13611382
If `show_legend` is `True`, the resulting plot graphs will contain
13621383
labeled legends.
@@ -1382,5 +1403,6 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13821403
smooth_equity=smooth_equity,
13831404
relative_equity=relative_equity,
13841405
superimpose=superimpose,
1406+
resample=resample,
13851407
show_legend=show_legend,
13861408
open_browser=open_browser)

backtesting/lib.py

+25
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,31 @@
4040
df.resample('4H', label='right').agg(OHLCV_AGG)
4141
"""
4242

43+
TRADES_AGG = OrderedDict((
44+
('Size', 'sum'),
45+
('EntryBar', 'first'),
46+
('ExitBar', 'last'),
47+
('EntryPrice', 'mean'),
48+
('ExitPrice', 'mean'),
49+
('PnL', 'sum'),
50+
('ReturnPct', 'mean'),
51+
('EntryTime', 'first'),
52+
('ExitTime', 'last'),
53+
('Duration', 'sum'),
54+
))
55+
"""Dictionary of rules for aggregating resampled trades data,
56+
e.g.
57+
58+
stats['_trades'].resample('1D', on='ExitTime',
59+
label='right').agg(TRADES_AGG)
60+
"""
61+
62+
_EQUITY_AGG = {
63+
'Equity': 'mean',
64+
'DrawdownPct': 'max',
65+
'DrawdownDuration': 'max',
66+
}
67+
4368

4469
def barssince(condition: Sequence[bool], default=np.inf) -> int:
4570
"""

backtesting/test/_test.py

+12
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,7 @@ def test_params(self):
452452
plot_pl=False,
453453
plot_drawdown=True,
454454
superimpose=False,
455+
resample='1W',
455456
smooth_equity=False,
456457
relative_equity=False,
457458
show_legend=False).items():
@@ -534,6 +535,17 @@ def next(self):
534535
# Give browser time to open before tempfile is removed
535536
time.sleep(1)
536537

538+
def test_resample(self):
539+
bt = Backtest(GOOG, SmaCross)
540+
bt.run()
541+
import backtesting._plotting
542+
with _tempfile() as f,\
543+
patch.object(backtesting._plotting, '_MAX_CANDLES', 10),\
544+
self.assertWarns(UserWarning):
545+
bt.plot(filename=f, resample=True)
546+
# Give browser time to open before tempfile is removed
547+
time.sleep(1)
548+
537549
def test_indicator_color(self):
538550
class S(Strategy):
539551
def init(self):

0 commit comments

Comments
 (0)