Skip to content

ENH: Extract strategy performance method compute_stats #281

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 142 additions & 0 deletions backtesting/_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
from typing import List

import numpy as np
import pandas as pd

from ._util import _data_period


def compute_drawdown_duration_peaks(dd: pd.Series):
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
iloc = pd.Series(iloc, index=dd.index[iloc])
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
df = df[df['iloc'] > df['prev'] + 1].astype(int)

# If no drawdown since no trade, avoid below for pandas sake and return nan series
if not len(df):
return (dd.replace(0, np.nan),) * 2

df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
df = df.reindex(dd.index)
return df['duration'], df['peak_dd']


def geometric_mean(returns: pd.Series) -> float:
returns = returns.fillna(0) + 1
if np.any(returns <= 0):
return 0

return np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1


def compute_stats(
trades: List[pd.DataFrame],
equity: np.ndarray,
ohlc_data: pd.DataFrame,
risk_free_rate: float = 0) -> pd.Series:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good. Now let's expose+document this function publicly in backtesting.lib. The user will be able to pass trades: pd.DataFrame as obtained by this exact method (stats._trades), so you have to figure out a way for that.


index = ohlc_data.index
dd = 1 - equity / np.maximum.accumulate(equity)
dd_dur, dd_peaks = compute_drawdown_duration_peaks(pd.Series(dd, index=index))

equity_df = pd.DataFrame({
'Equity': equity,
'DrawdownPct': dd,
'DrawdownDuration': dd_dur},
index=index)

trades_df = pd.DataFrame({
'Size': [t.size for t in trades],
'EntryBar': [t.entry_bar for t in trades],
'ExitBar': [t.exit_bar for t in trades],
'EntryPrice': [t.entry_price for t in trades],
'ExitPrice': [t.exit_price for t in trades],
'PnL': [t.pl for t in trades],
'ReturnPct': [t.pl_pct for t in trades],
'EntryTime': [t.entry_time for t in trades],
'ExitTime': [t.exit_time for t in trades],
})
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']

pl = trades_df['PnL']
returns = trades_df['ReturnPct']
durations = trades_df['Duration']

def _round_timedelta(value, _period=_data_period(index)):
if not isinstance(value, pd.Timedelta):
return value
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
return value.ceil(resolution)

s = pd.Series(dtype=object)
s.loc['Start'] = index[0]
s.loc['End'] = index[-1]
s.loc['Duration'] = s.End - s.Start

have_position = np.repeat(0, len(index))
for t in trades:
have_position[t.entry_bar:t.exit_bar + 1] = 1

s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
s.loc['Equity Final [$]'] = equity[-1]
s.loc['Equity Peak [$]'] = equity.max()
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
c = ohlc_data.Close.values
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return

gmean_day_return: float = 0
day_returns = np.array(np.nan)
annual_trading_days = np.nan
if isinstance(index, pd.DatetimeIndex):
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
gmean_day_return = geometric_mean(day_returns)
annual_trading_days = float(
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
252)

# Annualized return and risk metrics are computed based on the (mostly correct)
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
# our risk doesn't; they use the simpler approach below.
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
s.loc['Return (Ann.) [%]'] = annualized_return * 100
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100

# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
# and simple standard deviation
s.loc['Sharpe Ratio'] = np.clip((s.loc['Return (Ann.) [%]'] - risk_free_rate) / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
s.loc['Sortino Ratio'] = np.clip((annualized_return - risk_free_rate) / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
max_dd = -np.nan_to_num(dd.max())
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
s.loc['Max. Drawdown [%]'] = max_dd * 100
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
s.loc['# Trades'] = n_trades = len(trades)
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
s.loc['Best Trade [%]'] = returns.max() * 100
s.loc['Worst Trade [%]'] = returns.min() * 100
mean_return = geometric_mean(returns)
s.loc['Avg. Trade [%]'] = mean_return * 100
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
s.loc['Expectancy [%]'] = returns.mean() * 100
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)

s.loc['_equity_curve'] = equity_df
s.loc['_trades'] = trades_df

s = _Stats(s)
return s


class _Stats(pd.Series):
def __repr__(self):
# Prevent expansion due to _equity and _trades dfs
with pd.option_context('max_colwidth', 20):
return super().__repr__()
148 changes: 16 additions & 132 deletions backtesting/backtesting.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import numpy as np
import pandas as pd


try:
from tqdm.auto import tqdm as _tqdm
_tqdm = partial(_tqdm, leave=False)
Expand All @@ -29,7 +30,8 @@ def _tqdm(seq, **_):
return seq

from ._plotting import plot
from ._util import _as_str, _Indicator, _Data, _data_period, try_
from ._stats import compute_stats
from ._util import _as_str, _Indicator, _Data, try_

__pdoc__ = {
'Strategy.__init__': False,
Expand Down Expand Up @@ -1086,7 +1088,7 @@ def __init__(self,
exclusive_orders=exclusive_orders, index=data.index,
)
self._strategy = strategy
self._results = None
self._results: Optional[pd.Series] = None

def run(self, **kwargs) -> pd.Series:
"""
Expand Down Expand Up @@ -1177,7 +1179,17 @@ def run(self, **kwargs) -> pd.Series:
# for future `indicator._opts['data'].index` calls to work
data._set_length(len(self._data))

self._results = self._compute_stats(broker, strategy)
equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values

stats = compute_stats(
trades=broker.closed_trades,
equity=equity,
ohlc_data=self._data,
risk_free_rate=0.0,
)
stats.loc['_strategy'] = strategy
self._results = stats

return self._results

def optimize(self, *,
Expand Down Expand Up @@ -1253,7 +1265,7 @@ def optimize(self, *,
constraint=lambda p: p.sma1 < p.sma2)

.. TODO::
Improve multiprocessing/parallel execution on Windos with start method 'spawn'.
Improve multiprocessing/parallel execution on Windows with start method 'spawn'.
"""
if not kwargs:
raise ValueError('Need some strategy parameters to optimize')
Expand Down Expand Up @@ -1485,134 +1497,6 @@ def _mp_task(backtest_uuid, batch_index):

_mp_backtests: Dict[float, Tuple['Backtest', List, Callable]] = {}

@staticmethod
def _compute_drawdown_duration_peaks(dd: pd.Series):
iloc = np.unique(np.r_[(dd == 0).values.nonzero()[0], len(dd) - 1])
iloc = pd.Series(iloc, index=dd.index[iloc])
df = iloc.to_frame('iloc').assign(prev=iloc.shift())
df = df[df['iloc'] > df['prev'] + 1].astype(int)
# If no drawdown since no trade, avoid below for pandas sake and return nan series
if not len(df):
return (dd.replace(0, np.nan),) * 2
df['duration'] = df['iloc'].map(dd.index.__getitem__) - df['prev'].map(dd.index.__getitem__)
df['peak_dd'] = df.apply(lambda row: dd.iloc[row['prev']:row['iloc'] + 1].max(), axis=1)
df = df.reindex(dd.index)
return df['duration'], df['peak_dd']

def _compute_stats(self, broker: _Broker, strategy: Strategy) -> pd.Series:
data = self._data
index = data.index

equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
dd = 1 - equity / np.maximum.accumulate(equity)
dd_dur, dd_peaks = self._compute_drawdown_duration_peaks(pd.Series(dd, index=index))

equity_df = pd.DataFrame({
'Equity': equity,
'DrawdownPct': dd,
'DrawdownDuration': dd_dur},
index=index)

trades = broker.closed_trades
trades_df = pd.DataFrame({
'Size': [t.size for t in trades],
'EntryBar': [t.entry_bar for t in trades],
'ExitBar': [t.exit_bar for t in trades],
'EntryPrice': [t.entry_price for t in trades],
'ExitPrice': [t.exit_price for t in trades],
'PnL': [t.pl for t in trades],
'ReturnPct': [t.pl_pct for t in trades],
'EntryTime': [t.entry_time for t in trades],
'ExitTime': [t.exit_time for t in trades],
})
trades_df['Duration'] = trades_df['ExitTime'] - trades_df['EntryTime']

pl = trades_df['PnL']
returns = trades_df['ReturnPct']
durations = trades_df['Duration']

def _round_timedelta(value, _period=_data_period(index)):
if not isinstance(value, pd.Timedelta):
return value
resolution = getattr(_period, 'resolution_string', None) or _period.resolution
return value.ceil(resolution)

s = pd.Series(dtype=object)
s.loc['Start'] = index[0]
s.loc['End'] = index[-1]
s.loc['Duration'] = s.End - s.Start

have_position = np.repeat(0, len(index))
for t in trades:
have_position[t.entry_bar:t.exit_bar + 1] = 1 # type: ignore

s.loc['Exposure Time [%]'] = have_position.mean() * 100 # In "n bars" time, not index time
s.loc['Equity Final [$]'] = equity[-1]
s.loc['Equity Peak [$]'] = equity.max()
s.loc['Return [%]'] = (equity[-1] - equity[0]) / equity[0] * 100
c = data.Close.values
s.loc['Buy & Hold Return [%]'] = (c[-1] - c[0]) / c[0] * 100 # long-only return

def geometric_mean(returns):
returns = returns.fillna(0) + 1
return (0 if np.any(returns <= 0) else
np.exp(np.log(returns).sum() / (len(returns) or np.nan)) - 1)

day_returns = gmean_day_return = np.array(np.nan)
annual_trading_days = np.nan
if isinstance(index, pd.DatetimeIndex):
day_returns = equity_df['Equity'].resample('D').last().dropna().pct_change()
gmean_day_return = geometric_mean(day_returns)
annual_trading_days = float(
365 if index.dayofweek.to_series().between(5, 6).mean() > 2/7 * .6 else
252)

# Annualized return and risk metrics are computed based on the (mostly correct)
# assumption that the returns are compounded. See: https://dx.doi.org/10.2139/ssrn.3054517
# Our annualized return matches `empyrical.annual_return(day_returns)` whereas
# our risk doesn't; they use the simpler approach below.
annualized_return = (1 + gmean_day_return)**annual_trading_days - 1
s.loc['Return (Ann.) [%]'] = annualized_return * 100
s.loc['Volatility (Ann.) [%]'] = np.sqrt((day_returns.var(ddof=int(bool(day_returns.shape))) + (1 + gmean_day_return)**2)**annual_trading_days - (1 + gmean_day_return)**(2*annual_trading_days)) * 100 # noqa: E501
# s.loc['Return (Ann.) [%]'] = gmean_day_return * annual_trading_days * 100
# s.loc['Risk (Ann.) [%]'] = day_returns.std(ddof=1) * np.sqrt(annual_trading_days) * 100

# Our Sharpe mismatches `empyrical.sharpe_ratio()` because they use arithmetic mean return
# and simple standard deviation
s.loc['Sharpe Ratio'] = np.clip(s.loc['Return (Ann.) [%]'] / (s.loc['Volatility (Ann.) [%]'] or np.nan), 0, np.inf) # noqa: E501
# Our Sortino mismatches `empyrical.sortino_ratio()` because they use arithmetic mean return
s.loc['Sortino Ratio'] = np.clip(annualized_return / (np.sqrt(np.mean(day_returns.clip(-np.inf, 0)**2)) * np.sqrt(annual_trading_days)), 0, np.inf) # noqa: E501
max_dd = -np.nan_to_num(dd.max())
s.loc['Calmar Ratio'] = np.clip(annualized_return / (-max_dd or np.nan), 0, np.inf)
s.loc['Max. Drawdown [%]'] = max_dd * 100
s.loc['Avg. Drawdown [%]'] = -dd_peaks.mean() * 100
s.loc['Max. Drawdown Duration'] = _round_timedelta(dd_dur.max())
s.loc['Avg. Drawdown Duration'] = _round_timedelta(dd_dur.mean())
s.loc['# Trades'] = n_trades = len(trades)
s.loc['Win Rate [%]'] = np.nan if not n_trades else (pl > 0).sum() / n_trades * 100 # noqa: E501
s.loc['Best Trade [%]'] = returns.max() * 100
s.loc['Worst Trade [%]'] = returns.min() * 100
mean_return = geometric_mean(returns)
s.loc['Avg. Trade [%]'] = mean_return * 100
s.loc['Max. Trade Duration'] = _round_timedelta(durations.max())
s.loc['Avg. Trade Duration'] = _round_timedelta(durations.mean())
s.loc['Profit Factor'] = returns[returns > 0].sum() / (abs(returns[returns < 0].sum()) or np.nan) # noqa: E501
s.loc['Expectancy [%]'] = returns.mean() * 100
s.loc['SQN'] = np.sqrt(n_trades) * pl.mean() / (pl.std() or np.nan)

s.loc['_strategy'] = strategy
s.loc['_equity_curve'] = equity_df
s.loc['_trades'] = trades_df

s = Backtest._Stats(s)
return s

class _Stats(pd.Series):
def __repr__(self):
# Prevent expansion due to _equity and _trades dfs
with pd.option_context('max_colwidth', 20):
return super().__repr__()

def plot(self, *, results: pd.Series = None, filename=None, plot_width=None,
plot_equity=True, plot_return=False, plot_pl=True,
plot_volume=True, plot_drawdown=False,
Expand Down
20 changes: 19 additions & 1 deletion backtesting/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
from itertools import compress
from numbers import Number
from inspect import currentframe
from typing import Sequence, Optional, Union, Callable
from typing import Sequence, Optional, Union, Callable, List

import numpy as np
import pandas as pd

from .backtesting import Strategy
from ._plotting import plot_heatmaps as _plot_heatmaps
from ._stats import compute_stats as _compute_stats
from ._util import _Array, _as_str

__pdoc__ = {}
Expand Down Expand Up @@ -77,6 +78,23 @@ def barssince(condition: Sequence[bool], default=np.inf) -> int:
return next(compress(range(len(condition)), reversed(condition)), default)


def compute_stats(
trades: List[pd.DataFrame],
equity: np.ndarray,
ohlc_data: pd.DataFrame,
risk_free_rate: float = 0) -> pd.Series:
# TODO: Add details
"""
Computes strategy performance metrics.

>>> broker = stats._strategy._broker
>>> equity = pd.Series(broker._equity).bfill().fillna(broker._cash).values
>>> perf = compute_stats(trades=broker.closed_trades, equity=equity, ohlc_data=GOOG)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@kernc passing stats._strategy._broker.closed_trades makes it compatible as is. If we want to pass stats._trades, we'd need to convert the DataFrame to a list of Trade.

"""

return _compute_stats(trades, equity, ohlc_data, risk_free_rate)


def cross(series1: Sequence, series2: Sequence) -> bool:
"""
Return `True` if `series1` and `series2` just crossed (either
Expand Down
3 changes: 2 additions & 1 deletion backtesting/test/_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import pandas as pd

from backtesting import Backtest, Strategy
from backtesting._stats import compute_drawdown_duration_peaks
from backtesting.lib import (
OHLCV_AGG,
barssince,
Expand Down Expand Up @@ -242,7 +243,7 @@ def test_strategy_str(self):

def test_compute_drawdown(self):
dd = pd.Series([0, 1, 7, 0, 4, 0, 0])
durations, peaks = Backtest._compute_drawdown_duration_peaks(dd)
durations, peaks = compute_drawdown_duration_peaks(dd)
np.testing.assert_array_equal(durations, pd.Series([3, 2], index=[3, 5]).reindex(dd.index))
np.testing.assert_array_equal(peaks, pd.Series([7, 4], index=[3, 5]).reindex(dd.index))

Expand Down