Skip to content

Commit 70bdf5c

Browse files
committed
ENH: Add bt.plot(resample=) and auto-downsample large data
Fixes #35
1 parent 2223b22 commit 70bdf5c

File tree

4 files changed

+136
-13
lines changed

4 files changed

+136
-13
lines changed

backtesting/_plotting.py

+68-4
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,65 @@ def lightness(color, lightness=.94):
8585
return color.to_rgb()
8686

8787

88+
_MAX_CANDLES = 10000
89+
90+
91+
def _maybe_resample_data(resample_rule, df, indicators, equity_data, trades):
92+
if not resample_rule or len(df) < _MAX_CANDLES:
93+
return df, indicators, equity_data, trades
94+
95+
if isinstance(resample_rule, str):
96+
freq = resample_rule
97+
else:
98+
from_index = dict(day=-2, hour=-6, minute=1, second=0, millisecond=0,
99+
microsecond=0, nanosecond=0)[df.index.resolution]
100+
FREQS = ('1T', '5T', '10T', '15T', '30T', '1H', '2H', '4H', '8H', '1D', '1W', '1M')
101+
freq = next((f for f in FREQS[from_index:]
102+
if len(df.resample(f)) <= _MAX_CANDLES), FREQS[-1])
103+
warnings.warn("Data contains too many candlesticks to plot; downsampling to {!r}. "
104+
"See `Backtest.plot(resample=...)`".format(freq))
105+
106+
from .lib import OHLCV_AGG, TRADES_AGG, _EQUITY_AGG
107+
df = df.resample(freq, label='right').agg(OHLCV_AGG).dropna()
108+
109+
# XXX: copy(True) pandas bug https://github.com/pandas-dev/pandas/issues/31710
110+
indicators = [_Indicator(i.s.copy(True).resample(freq).mean().dropna().reindex(df.index),
111+
**dict(i._opts,
112+
# HACK: override `data` for its index
113+
data=pd.Series(np.nan, index=df.index)))
114+
for i in indicators]
115+
assert not indicators or indicators[0].s.index.equals(df.index)
116+
117+
equity_data = equity_data.resample(freq, label='right').agg(_EQUITY_AGG).dropna(how='all')
118+
assert equity_data.index.equals(df.index)
119+
120+
def _weighted_returns(s, trades=trades):
121+
df = trades.loc[s.index]
122+
return ((df['Size'].abs() * df['ReturnPct']) / df['Size'].abs().sum()).sum()
123+
124+
trades = trades.assign(count=1).resample(freq, on='ExitTime', label='right').agg(dict(
125+
TRADES_AGG,
126+
ReturnPct=_weighted_returns,
127+
count='sum',
128+
# XXX: Can this prettier?
129+
EntryBar=(lambda s, trades=trades, index=df.index:
130+
index.get_loc(trades.loc[s.index]['EntryTime'].mean(), method='nearest')),
131+
ExitBar=(lambda s, trades=trades, index=df.index:
132+
index.get_loc(trades.loc[s.index]['ExitTime'].mean(), method='nearest')),
133+
)).dropna()
134+
135+
return df, indicators, equity_data, trades
136+
137+
88138
def plot(*, results: pd.Series,
89139
df: pd.DataFrame,
90140
indicators: List[_Indicator],
91141
filename='', plot_width=None,
92142
plot_equity=True, plot_pl=True,
93143
plot_volume=True, plot_drawdown=False,
94144
smooth_equity=False, relative_equity=True,
95-
superimpose=True, show_legend=True, open_browser=True):
145+
superimpose=True, resample=True,
146+
show_legend=True, open_browser=True):
96147
"""
97148
Like much of GUI code everywhere, this is a mess.
98149
"""
@@ -111,15 +162,19 @@ def plot(*, results: pd.Series,
111162
trades = results['_trades']
112163

113164
plot_volume = plot_volume and not df.Volume.isnull().all()
114-
time_resolution = getattr(df.index, 'resolution', None)
115165
is_datetime_index = df.index.is_all_dates
116166

117167
from .lib import OHLCV_AGG
118168
# ohlc df may contain many columns. We're only interested in, and pass on to Bokeh, these
119169
df = df[list(OHLCV_AGG.keys())].copy(deep=False)
170+
171+
# Limit data to max_candles
172+
if is_datetime_index:
173+
df, indicators, equity_data, trades = _maybe_resample_data(
174+
resample, df, indicators, equity_data, trades)
175+
120176
df.index.name = None # Provides source name @index
121177
df['datetime'] = df.index # Save original, maybe datetime index
122-
123178
df = df.reset_index(drop=True)
124179
equity_data = equity_data.reset_index(drop=True)
125180
index = df.index
@@ -224,6 +279,10 @@ def _plot_equity_section():
224279
x1, x2 = dd_end - 1, dd_end
225280
y, y1, y2 = equity[dd_start], equity[x1], equity[x2]
226281
dd_end -= (1 - (y - y1) / (y2 - y1)) * (dd_end - x1) # y = a x + b
282+
# If _plot_resample_data() was applied,
283+
# the agg'd equity might have "stretched" the calculation
284+
# XXX: test this?
285+
dd_end = min(dd_end, equity.index[-1])
227286

228287
if smooth_equity:
229288
interest_points = pd.Index([
@@ -321,11 +380,15 @@ def _plot_pl_section():
321380
trade_source.add(returns_long, 'returns_long')
322381
trade_source.add(returns_short, 'returns_short')
323382
trade_source.add(size, 'marker_size')
383+
if 'count' in trades:
384+
trade_source.add(trades['count'], 'count')
324385
r1 = fig.scatter('index', 'returns_long', source=trade_source, fill_color=cmap,
325386
marker='triangle', line_color='black', size='marker_size')
326387
r2 = fig.scatter('index', 'returns_short', source=trade_source, fill_color=cmap,
327388
marker='inverted_triangle', line_color='black', size='marker_size')
328389
tooltips = [("Size", "@size{0,0}")]
390+
if 'count' in trades:
391+
tooltips.append(("Count", "@count{0,0}"))
329392
set_tooltips(fig, tooltips + [("P/L", "@returns_long{+0.[000]%}")],
330393
vline=False, renderers=[r1])
331394
set_tooltips(fig, tooltips + [("P/L", "@returns_short{+0.[000]%}")],
@@ -346,8 +409,9 @@ def _plot_volume_section():
346409

347410
def _plot_superimposed_ohlc():
348411
"""Superimposed, downsampled vbars"""
412+
time_resolution = pd.DatetimeIndex(df['datetime']).resolution
349413
resample_rule = (superimpose if isinstance(superimpose, str) else
350-
dict(day='W',
414+
dict(day='M',
351415
hour='D',
352416
minute='H',
353417
second='T',

backtesting/backtesting.py

+31-9
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def init():
141141

142142
value = _Indicator(value, name=name, plot=plot, overlay=overlay,
143143
color=color, scatter=scatter,
144-
# lib.resample_apply() uses this:
144+
# _Indicator.s Series accessor uses this:
145145
data=self.data)
146146
self._indicators.append(value)
147147
return value
@@ -952,7 +952,8 @@ def __init__(self,
952952
stacklevel=2)
953953
data = data.sort_index()
954954
if not data.index.is_all_dates:
955-
warnings.warn('Data index is not datetime. Assuming simple periods.',
955+
warnings.warn('Data index is not datetime. Assuming simple periods, '
956+
'but `pd.DateTimeIndex` is advised.',
956957
stacklevel=2)
957958

958959
self._data = data # type: pd.DataFrame
@@ -1006,6 +1007,10 @@ def run(self, **kwargs) -> pd.Series:
10061007
# Next tick, a moment before bar close
10071008
strategy.next()
10081009

1010+
# Set data back to full length
1011+
# for future `indicator._opts['data'].index` calls to work
1012+
data._set_length(len(self._data))
1013+
10091014
self._results = self._compute_stats(broker, strategy)
10101015
return self._results
10111016

@@ -1250,6 +1255,7 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
12501255
plot_volume=True, plot_drawdown=False,
12511256
smooth_equity=False, relative_equity=True,
12521257
superimpose: Union[bool, str] = True,
1258+
resample=True,
12531259
show_legend=True, open_browser=True):
12541260
"""
12551261
Plot the progression of the last backtest run.
@@ -1287,16 +1293,31 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
12871293
If `relative_equity` is `True`, scale and label equity graph axis
12881294
with return percent, not absolute cash-equivalent values.
12891295
1290-
If `superimpose` is `True`, superimpose downsampled candlesticks
1291-
over the original candlestick chart. Default downsampling is:
1292-
weekly for daily data, daily for hourly data, hourly for minute data,
1293-
and minute for second and sub-second data.
1294-
`superimpose` can also be a string, in which case it is a valid
1295-
[Pandas offset string], such as `'5T'` or `'5min'`.
1296+
If `superimpose` is `True`, superimpose larger-timeframe candlesticks
1297+
over the original candlestick chart. Default downsampling rule is:
1298+
monthly for daily data, daily for hourly data, hourly for minute data,
1299+
and minute for (sub-)second data.
1300+
`superimpose` can also be a valid [Pandas offset string],
1301+
such as `'5T'` or `'5min'`, in which case this frequency will be
1302+
used to superimpose.
12961303
Note, this only works for data with a datetime index.
12971304
1305+
If `resample` is `True`, the OHLC data is resampled in a way that
1306+
makes the upper number of candles for Bokeh to plot limited to 10_000.
1307+
This may, in situations of overabundant data,
1308+
improve plot's interactive performance and avoid browser's
1309+
`Javascript Error: Maximum call stack size exceeded` or similar.
1310+
Equity & dropdown curves and individual trades data is,
1311+
likewise, [reasonably _aggregated_][TRADES_AGG].
1312+
`resample` can also be a [Pandas offset string],
1313+
such as `'5T'` or `'5min'`, in which case this frequency will be
1314+
used to resample, overriding above numeric limitation.
1315+
Note, all this only works for data with a datetime index.
1316+
12981317
[Pandas offset string]: \
1299-
http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases
1318+
https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#dateoffset-objects
1319+
1320+
[TRADES_AGG]: lib.html#backtesting.lib.TRADES_AGG
13001321
13011322
If `show_legend` is `True`, the resulting plot graphs will contain
13021323
labeled legends.
@@ -1322,5 +1343,6 @@ def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
13221343
smooth_equity=smooth_equity,
13231344
relative_equity=relative_equity,
13241345
superimpose=superimpose,
1346+
resample=resample,
13251347
show_legend=show_legend,
13261348
open_browser=open_browser)

backtesting/lib.py

+25
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,31 @@
4040
df.resample('4H', label='right').agg(OHLCV_AGG)
4141
"""
4242

43+
TRADES_AGG = OrderedDict((
44+
('Size', 'sum'),
45+
('EntryBar', 'first'),
46+
('ExitBar', 'last'),
47+
('EntryPrice', 'mean'),
48+
('ExitPrice', 'mean'),
49+
('PnL', 'sum'),
50+
('ReturnPct', 'mean'),
51+
('EntryTime', 'first'),
52+
('ExitTime', 'last'),
53+
('Duration', 'sum'),
54+
))
55+
"""Dictionary of rules for aggregating resampled trades data,
56+
e.g.
57+
58+
stats['_trades'].resample('1D', on='ExitTime',
59+
label='right').agg(TRADES_AGG)
60+
"""
61+
62+
_EQUITY_AGG = {
63+
'Equity': 'mean',
64+
'DrawdownPct': 'max',
65+
'DrawdownDuration': 'max',
66+
}
67+
4368

4469
def barssince(condition: Sequence[bool], default=np.inf) -> int:
4570
"""

backtesting/test/_test.py

+12
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from runpy import run_path
99
from tempfile import NamedTemporaryFile, gettempdir
1010
from unittest import TestCase
11+
from unittest.mock import patch
1112

1213
import numpy as np
1314
import pandas as pd
@@ -500,6 +501,17 @@ def next(self):
500501
# Give browser time to open before tempfile is removed
501502
time.sleep(1)
502503

504+
def test_resample(self):
505+
bt = Backtest(GOOG, SmaCross)
506+
bt.run()
507+
import backtesting._plotting
508+
with _tempfile() as f,\
509+
patch.object(backtesting._plotting, '_MAX_CANDLES', 10),\
510+
self.assertWarns(UserWarning):
511+
bt.plot(filename=f, resample=True)
512+
# Give browser time to open before tempfile is removed
513+
time.sleep(1)
514+
503515
def test_indicator_color(self):
504516
class S(Strategy):
505517
def init(self):

0 commit comments

Comments
 (0)