Skip to content

Commit 77f4c46

Browse files
crazy25000kernc
andauthored
ENH: Extract strategy performance method compute_stats (#281)
* ENH: Extract methods _compute_stats and _compute_drawdown_duration_peaks from Backtest * ENH: Move compute stats methods to new file, _stats.py * TST: Update unit tests for compute_drawdown_duration_peaks * TST: Remove ignore type for CI test failure * REF: Remove broker dependency from compute_stats, update sharpe ratio to use risk free rate * REF: Update self._results to account for compute_stats change, fix typo * Update backtesting/backtesting.py Co-authored-by: kernc <[email protected]> * Update backtesting/backtesting.py Co-authored-by: kernc <[email protected]> * REF: Add risk_free_rate to Sortino Ratio * ENH: Add compute_stats to lib, provide public method * REF: Extract params to reduce line length * REF: Use strategy broker to calculate equity * REF: Use example from test * Update, make more idempotent, add doc, test Co-authored-by: kernc <[email protected]>
1 parent a0c6589 commit 77f4c46

File tree

4 files changed

+214
-132
lines changed

4 files changed

+214
-132
lines changed

backtesting/_stats.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
from typing import List, TYPE_CHECKING, Union
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
from ._util import _data_period
7+
8+
if TYPE_CHECKING:
9+
from .backtesting import Strategy, Trade
10+
11+
12+
def compute_drawdown_duration_peaks(dd: pd.Series):
13+
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
14+
iloc = pd.Series(iloc, index=dd.index[iloc])
15+
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
16+
df = df[df['iloc'] > df['prev'] + 1].astype(int)
17+
18+
# If no drawdown since no trade, avoid below for pandas sake and return nan series
19+
if not len(df):
20+
return (dd.replace(0, np.nan),) * 2
21+
22+
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
23+
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
24+
df = df.reindex(dd.index)
25+
return df['duration'], df['peak_dd']
26+
27+
28+
def geometric_mean(returns: pd.Series) -> float:
29+
returns = returns.fillna(0) + 1
30+
if np.any(returns <= 0):
31+
return 0
32+
return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1
33+
34+
35+
def compute_stats(
36+
trades: Union[List['Trade'], pd.DataFrame],
37+
equity: np.ndarray,
38+
ohlc_data: pd.DataFrame,
39+
strategy_instance: 'Strategy',
40+
risk_free_rate: float = 0,
41+
) -> pd.Series:
42+
assert -1 < risk_free_rate < 1
43+
44+
index = ohlc_data.index
45+
dd = 1 - equity / np.maximum.accumulate(equity)
46+
dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
47+
48+
equity_df = pd.DataFrame({
49+
'Equity': equity,
50+
'DrawdownPct': dd,
51+
'DrawdownDuration': dd_dur},
52+
index=index)
53+
54+
if isinstance(trades, pd.DataFrame):
55+
trades_df = trades
56+
else:
57+
# Came straight from Backtest.run()
58+
trades_df = pd.DataFrame({
59+
'Size': [t.size for t in trades],
60+
'EntryBar': [t.entry_bar for t in trades],
61+
'ExitBar': [t.exit_bar for t in trades],
62+
'EntryPrice': [t.entry_price for t in trades],
63+
'ExitPrice': [t.exit_price for t in trades],
64+
'PnL': [t.pl for t in trades],
65+
'ReturnPct': [t.pl_pct for t in trades],
66+
'EntryTime': [t.entry_time for t in trades],
67+
'ExitTime': [t.exit_time for t in trades],
68+
})
69+
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
70+
del trades
71+
72+
pl = trades_df['PnL']
73+
returns = trades_df['ReturnPct']
74+
durations = trades_df['Duration']
75+
76+
def _round_timedelta(value, _period=_data_period(index)):
77+
if not isinstance(value, pd.Timedelta):
78+
return value
79+
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
80+
return value.ceil(resolution)
81+
82+
s = pd.Series(dtype=object)
83+
s.loc['Start'] = index[0]
84+
s.loc['End'] = index[-1]
85+
s.loc['Duration'] = s.End - s.Start
86+
87+
have_position = np.repeat(0, len(index))
88+
for t in trades_df.itertuples(index=False):
89+
have_position[t.EntryBar:t.ExitBar + 1] = 1
90+
91+
s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
92+
s.loc['Equity Final [$]'] = equity[-1]
93+
s.loc['Equity Peak [$]'] = equity.max()
94+
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
95+
c = ohlc_data.Close.values
96+
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return
97+
98+
gmean_day_return: float = 0
99+
day_returns = np.array(np.nan)
100+
annual_trading_days = np.nan
101+
if isinstance(index, pd.DatetimeIndex):
102+
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
103+
gmean_day_return = geometric_mean(day_returns)
104+
annual_trading_days = float(
105+
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
106+
252)
107+
108+
# Annualized return and risk metrics are computed based on the (mostly correct)
109+
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
110+
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
111+
# our risk doesn't; they use the simpler approach below.
112+
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
113+
s.loc['Return (Ann.) [%]'] = annualized_return * 100
114+
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
115+
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
116+
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
117+
118+
# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
119+
# and simple standard deviation
120+
s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
121+
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
122+
s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
123+
max_dd = -np.nan_to_num(dd.max())
124+
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
125+
s.loc['Max. Drawdown [%]'] = max_dd * 100
126+
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
127+
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
128+
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
129+
s.loc['# Trades'] = n_trades = len(trades_df)
130+
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
131+
s.loc['Best Trade [%]'] = returns.max() * 100
132+
s.loc['Worst Trade [%]'] = returns.min() * 100
133+
mean_return = geometric_mean(returns)
134+
s.loc['Avg. Trade [%]'] = mean_return * 100
135+
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
136+
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
137+
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
138+
s.loc['Expectancy [%]'] = returns.mean() * 100
139+
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
140+
141+
s.loc['_strategy'] = strategy_instance
142+
s.loc['_equity_curve'] = equity_df
143+
s.loc['_trades'] = trades_df
144+
145+
s = _Stats(s)
146+
return s
147+
148+
149+
class _Stats(pd.Series):
150+
def __repr__(self):
151+
# Prevent expansion due to _equity and _trades dfs
152+
with pd.option_context('max_colwidth', 20):
153+
return super().__repr__()

backtesting/backtesting.py

Lines changed: 12 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@ def _tqdm(seq, **_):
2929
return seq
3030

3131
from ._plotting import plot
32-
from ._util import _as_str, _Indicator, _Data, _data_period, try_
32+
from ._stats import compute_stats
33+
from ._util import _as_str, _Indicator, _Data, try_
3334

3435
__pdoc__ = {
3536
'Strategy.__init__': False,
@@ -1089,7 +1090,7 @@ def __init__(self,
10891090
exclusive_orders=exclusive_orders, index=data.index,
10901091
)
10911092
self._strategy = strategy
1092-
self._results = None
1093+
self._results: Optional[pd.Series] = None
10931094

10941095
def run(self, **kwargs) -> pd.Series:
10951096
"""
@@ -1180,7 +1181,15 @@ def run(self, **kwargs) -> pd.Series:
11801181
# for future `indicator._opts['data'].index` calls to work
11811182
data._set_length(len(self._data))
11821183

1183-
self._results = self._compute_stats(broker, strategy)
1184+
equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
1185+
self._results = compute_stats(
1186+
trades=broker.closed_trades,
1187+
equity=equity,
1188+
ohlc_data=self._data,
1189+
risk_free_rate=0.0,
1190+
strategy_instance=strategy,
1191+
)
1192+
11841193
return self._results
11851194

11861195
def optimize(self, *,
@@ -1491,134 +1500,6 @@ def _mp_task(backtest_uuid, batch_index):
14911500

14921501
_mp_backtests: Dict[float, Tuple['Backtest', List, Callable]] = {}
14931502

1494-
@staticmethod
1495-
def _compute_drawdown_duration_peaks(dd: pd.Series):
1496-
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
1497-
iloc = pd.Series(iloc, index=dd.index[iloc])
1498-
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
1499-
df = df[df['iloc'] > df['prev'] + 1].astype(int)
1500-
# If no drawdown since no trade, avoid below for pandas sake and return nan series
1501-
if not len(df):
1502-
return (dd.replace(0, np.nan),) * 2
1503-
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
1504-
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
1505-
df = df.reindex(dd.index)
1506-
return df['duration'], df['peak_dd']
1507-
1508-
def _compute_stats(self, broker: _Broker, strategy: Strategy) -> pd.Series:
1509-
data = self._data
1510-
index = data.index
1511-
1512-
equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
1513-
dd = 1 - equity / np.maximum.accumulate(equity)
1514-
dd_dur, dd_peaks = self._compute_drawdown_duration_peaks(pd.Series(dd, index=index))
1515-
1516-
equity_df = pd.DataFrame({
1517-
'Equity': equity,
1518-
'DrawdownPct': dd,
1519-
'DrawdownDuration': dd_dur},
1520-
index=index)
1521-
1522-
trades = broker.closed_trades
1523-
trades_df = pd.DataFrame({
1524-
'Size': [t.size for t in trades],
1525-
'EntryBar': [t.entry_bar for t in trades],
1526-
'ExitBar': [t.exit_bar for t in trades],
1527-
'EntryPrice': [t.entry_price for t in trades],
1528-
'ExitPrice': [t.exit_price for t in trades],
1529-
'PnL': [t.pl for t in trades],
1530-
'ReturnPct': [t.pl_pct for t in trades],
1531-
'EntryTime': [t.entry_time for t in trades],
1532-
'ExitTime': [t.exit_time for t in trades],
1533-
})
1534-
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
1535-
1536-
pl = trades_df['PnL']
1537-
returns = trades_df['ReturnPct']
1538-
durations = trades_df['Duration']
1539-
1540-
def _round_timedelta(value, _period=_data_period(index)):
1541-
if not isinstance(value, pd.Timedelta):
1542-
return value
1543-
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
1544-
return value.ceil(resolution)
1545-
1546-
s = pd.Series(dtype=object)
1547-
s.loc['Start'] = index[0]
1548-
s.loc['End'] = index[-1]
1549-
s.loc['Duration'] = s.End - s.Start
1550-
1551-
have_position = np.repeat(0, len(index))
1552-
for t in trades:
1553-
have_position[t.entry_bar:t.exit_bar + 1] = 1 # type: ignore
1554-
1555-
s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
1556-
s.loc['Equity Final [$]'] = equity[-1]
1557-
s.loc['Equity Peak [$]'] = equity.max()
1558-
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
1559-
c = data.Close.values
1560-
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return
1561-
1562-
def geometric_mean(returns):
1563-
returns = returns.fillna(0) + 1
1564-
return (0 if np.any(returns <= 0) else
1565-
np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1)
1566-
1567-
day_returns = gmean_day_return = np.array(np.nan)
1568-
annual_trading_days = np.nan
1569-
if isinstance(index, pd.DatetimeIndex):
1570-
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
1571-
gmean_day_return = geometric_mean(day_returns)
1572-
annual_trading_days = float(
1573-
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
1574-
252)
1575-
1576-
# Annualized return and risk metrics are computed based on the (mostly correct)
1577-
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
1578-
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
1579-
# our risk doesn't; they use the simpler approach below.
1580-
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
1581-
s.loc['Return (Ann.) [%]'] = annualized_return * 100
1582-
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
1583-
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
1584-
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
1585-
1586-
# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
1587-
# and simple standard deviation
1588-
s.loc['Sharpe Ratio'] = np.clip(s.loc['Return (Ann.) [%]'] / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
1589-
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
1590-
s.loc['Sortino Ratio'] = np.clip(annualized_return / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
1591-
max_dd = -np.nan_to_num(dd.max())
1592-
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
1593-
s.loc['Max. Drawdown [%]'] = max_dd * 100
1594-
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
1595-
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
1596-
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
1597-
s.loc['# Trades'] = n_trades = len(trades)
1598-
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
1599-
s.loc['Best Trade [%]'] = returns.max() * 100
1600-
s.loc['Worst Trade [%]'] = returns.min() * 100
1601-
mean_return = geometric_mean(returns)
1602-
s.loc['Avg. Trade [%]'] = mean_return * 100
1603-
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
1604-
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
1605-
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
1606-
s.loc['Expectancy [%]'] = returns.mean() * 100
1607-
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
1608-
1609-
s.loc['_strategy'] = strategy
1610-
s.loc['_equity_curve'] = equity_df
1611-
s.loc['_trades'] = trades_df
1612-
1613-
s = Backtest._Stats(s)
1614-
return s
1615-
1616-
class _Stats(pd.Series):
1617-
def __repr__(self):
1618-
# Prevent expansion due to _equity and _trades dfs
1619-
with pd.option_context('max_colwidth', 20):
1620-
return super().__repr__()
1621-
16221503
def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
16231504
plot_equity=True, plot_return=False, plot_pl=True,
16241505
plot_volume=True, plot_drawdown=False,

backtesting/lib.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from .backtesting import Strategy
2424
from ._plotting import plot_heatmaps as _plot_heatmaps
25+
from ._stats import compute_stats as _compute_stats
2526
from ._util import _Array, _as_str
2627

2728
__pdoc__ = {}
@@ -164,6 +165,38 @@ def quantile(series: Sequence, quantile: Union[None, float] = None):
164165
return np.nanpercentile(series, quantile * 100)
165166

166167

168+
def compute_stats(
169+
*,
170+
stats: pd.Series,
171+
data: pd.DataFrame,
172+
trades: pd.DataFrame = None,
173+
risk_free_rate: float = 0.) -> pd.Series:
174+
"""
175+
(Re-)compute strategy performance metrics.
176+
177+
`stats` is the statistics series as returned by `Backtest.run()`.
178+
`data` is OHLC data as passed to the `Backtest` the `stats` were obtained in.
179+
`trades` can be a dataframe subset of `stats._trades` (e.g. only long trades).
180+
You can also tune `risk_free_rate`, used in calculation of Sharpe and Sortino ratios.
181+
182+
>>> stats = Backtest(GOOG, MyStrategy).run()
183+
>>> only_long_trades = stats._trades[stats._trades.Size > 0]
184+
>>> long_stats = compute_stats(stats=stats, trades=only_long_trades,
185+
... data=GOOG, risk_free_rate=.02)
186+
"""
187+
equity = stats._equity_curve.Equity
188+
if trades is None:
189+
trades = stats._trades
190+
else:
191+
# XXX: Is this buggy?
192+
equity = equity.copy()
193+
equity[:] = stats._equity_curve.Equity.iloc[0]
194+
for t in trades.itertuples(index=False):
195+
equity.iloc[t.EntryBar:] += t.PnL
196+
return _compute_stats(trades=trades, equity=equity, ohlc_data=data,
197+
risk_free_rate=risk_free_rate, strategy_instance=stats._strategy)
198+
199+
167200
def resample_apply(rule: str,
168201
func: Optional[Callable[..., Sequence]],
169202
series: Union[pd.Series, pd.DataFrame, _Array],

0 commit comments

Comments
 (0)