-
-
Notifications
You must be signed in to change notification settings - Fork 1.2k
/
Copy path_stats.py
156 lines (129 loc) · 6.63 KB
/
_stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
from typing import List, TYPE_CHECKING, Union
import numpy as np
import pandas as pd
from ._util import _data_period
if TYPE_CHECKING:
from .backtesting import Strategy, Trade
def compute_drawdown_duration_peaks(dd: pd.Series):
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
iloc = pd.Series(iloc, index=dd.index[iloc])
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
df = df[df['iloc'] > df['prev'] + 1].astype(int)
# If no drawdown since no trade, avoid below for pandas sake and return nan series
if not len(df):
return (dd.replace(0, np.nan),) * 2
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
df = df.reindex(dd.index)
return df['duration'], df['peak_dd']
def geometric_mean(returns: pd.Series) -> float:
returns = returns.fillna(0) + 1
if np.any(returns <= 0):
return 0
return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1
def compute_stats(
trades: Union[List[Trade], pd.DataFrame],
equity: np.ndarray,
ohlc_data: dict[str, pd.DataFrame],
index: pd.Index,
strategy_instance: Strategy,
risk_free_rate: float = 0,
) -> pd.Series:
assert -1 < risk_free_rate < 1
is_single_instrument = len(ohlc_data) == 1
dd = 1 - equity / np.maximum.accumulate(equity)
dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))
equity_df = pd.DataFrame({
'Equity': equity,
'DrawdownPct': dd,
'DrawdownDuration': dd_dur},
index=index)
if isinstance(trades, pd.DataFrame):
trades_df = trades
else:
# Came straight from Backtest.run()
trades_df = pd.DataFrame({
'Size': [t.size for t in trades],
'EntryBar': [t.entry_bar for t in trades],
'ExitBar': [t.exit_bar for t in trades],
'EntryPrice': [t.entry_price for t in trades],
'ExitPrice': [t.exit_price for t in trades],
'PnL': [t.pl for t in trades],
'ReturnPct': [t.pl_pct for t in trades],
'EntryTime': [t.entry_time for t in trades],
'ExitTime': [t.exit_time for t in trades],
})
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']
del trades
pl = trades_df['PnL']
returns = trades_df['ReturnPct']
durations = trades_df['Duration']
def _round_timedelta(value, _period=_data_period(index)):
if not isinstance(value, pd.Timedelta):
return value
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
return value.ceil(resolution)
s = pd.Series(dtype=object)
s.loc['Start'] = index[0]
s.loc['End'] = index[-1]
s.loc['Duration'] = s.End - s.Start
have_position = np.repeat(0, len(index))
for t in trades_df.itertuples(index=False):
have_position[t.EntryBar:t.ExitBar + 1] = 1
s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
s.loc['Equity Final [$]'] = equity[-1]
s.loc['Equity Peak [$]'] = equity.max()
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
if is_single_instrument:
c = ohlc_data['default_instrument'].Close.values
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return
gmean_day_return: float = 0
day_returns = np.array(np.nan)
annual_trading_days = np.nan
if isinstance(index, pd.DatetimeIndex):
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
gmean_day_return = geometric_mean(day_returns)
annual_trading_days = float(
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
252)
# Annualized return and risk metrics are computed based on the (mostly correct)
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
# our risk doesn't; they use the simpler approach below.
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
s.loc['Return (Ann.) [%]'] = annualized_return * 100
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100
# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
# and simple standard deviation
s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
max_dd = -np.nan_to_num(dd.max())
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
s.loc['Max. Drawdown [%]'] = max_dd * 100
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
s.loc['# Trades'] = n_trades = len(trades_df)
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
s.loc['Best Trade [%]'] = returns.max() * 100
s.loc['Worst Trade [%]'] = returns.min() * 100
mean_return = geometric_mean(returns)
s.loc['Avg. Trade [%]'] = mean_return * 100
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
s.loc['Expectancy [%]'] = returns.mean() * 100
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)
s.loc['_strategy'] = strategy_instance
s.loc['_equity_curve'] = equity_df
s.loc['_trades'] = trades_df
s = _Stats(s)
return s
class _Stats(pd.Series):
def __repr__(self):
# Prevent expansion due to _equity and _trades dfs
with pd.option_context('max_colwidth', 20):
return super().__repr__()