ENH: Add pandas accessors .df and .s

kernc · kernc · commit 506ebc6ca6da · 2020-03-23T23:48:57.000+01:00
diff --git a/backtesting/_util.py b/backtesting/_util.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import Sequence
 from numbers import Number
 
@@ -44,12 +45,10 @@ class _Array(np.ndarray):
     ndarray extended to supply .name and other arbitrary properties
     in ._opts dict.
     """
-    def __new__(cls, array, *, name=None, write=False, **kwargs):
+    def __new__(cls, array, *, name=None, **kwargs):
         obj = np.asarray(array).view(cls)
         obj.name = name or array.name
         obj._opts = kwargs
-        if not write:
-            obj.setflags(write=False)
         return obj
 
     def __array_finalize__(self, obj):
@@ -70,7 +69,20 @@ def __float__(self):
             return super().__float__()
 
     def to_series(self):
-        return pd.Series(self, index=self._opts['data'].index, name=self.name)
+        warnings.warn("`.to_series()` is deprecated. For pd.Series conversion, use accessor `.s`")
+        return self.s
+
+    @property
+    def s(self) -> pd.Series:
+        values = np.atleast_2d(self)
+        return pd.Series(values[0], index=self._opts['data'].index, name=self.name)
+
+    @property
+    def df(self) -> pd.DataFrame:
+        values = np.atleast_2d(np.asarray(self))
+        df = pd.DataFrame(values.T, index=self._opts['data'].index,
+                          columns=[self.name] * len(values))
+        return df
 
 
 class _Indicator(_Array):
@@ -84,15 +96,13 @@ class _Data:
     and the returned "series" are _not_ `pd.Series` but `np.ndarray`
     for performance reasons.
     """
-    def __init__(self, df):
+    def __init__(self, df: pd.DataFrame):
+        self.__df = df
         self.__i = len(df)
         self.__pip = None
         self.__cache = {}
-
-        self.__arrays = {col: _Array(arr, data=self)
-                         for col, arr in df.items()}
-        # Leave index as Series because pd.Timestamp nicer API to work with
-        self.__arrays['__index'] = df.index.copy()
+        self.__arrays = None
+        self._update()
 
     def __getitem__(self, item):
         return self.__get_array(item)
@@ -107,17 +117,35 @@ def _set_length(self, i):
         self.__i = i
         self.__cache.clear()
 
+    def _update(self):
+        self.__arrays = {col: _Array(arr, data=self)
+                         for col, arr in self.__df.items()}
+        # Leave index as Series because pd.Timestamp nicer API to work with
+        self.__arrays['__index'] = self.__df.index.copy()
+
+    def __repr__(self):
+        i = min(self.__i, len(self.__df) - 1)
+        return '<Data i={} ({}) {}>'.format(i, self.__arrays['__index'][i],
+                                            ', '.join('{}={}'.format(k, v)
+                                                      for k, v in self.__df.iloc[i].items()))
+
     def __len__(self):
         return self.__i
 
+    @property
+    def df(self) -> pd.DataFrame:
+        return (self.__df.iloc[:self.__i]
+                if self.__i < len(self.__df)
+                else self.__df)
+
     @property
     def pip(self):
         if self.__pip is None:
             self.__pip = 10**-np.median([len(s.partition('.')[-1])
                                          for s in self.__arrays['Close'].astype(str)])
         return self.__pip
 
-    def __get_array(self, key):
+    def __get_array(self, key) -> _Array:
         arr = self.__cache.get(key)
         if arr is None:
             arr = self.__cache[key] = self.__arrays[key][:self.__i]
@@ -144,8 +172,8 @@ def Volume(self):
         return self.__get_array('Volume')
 
     @property
-    def index(self):
-        return self.__get_array('__index')
+    def index(self) -> pd.DatetimeIndex:
+        return self.__get_array('__index')  # type: ignore
 
     # Make pickling in Backtest.optimize() work with our catch-all __getattr__
     def __getstate__(self):
diff --git a/backtesting/backtesting.py b/backtesting/backtesting.py
@@ -243,8 +243,8 @@ def data(self) -> _Data:
           the last array value (e.g. `data.Close[-1]`)
           is always the _most recent_ value.
         * If you need data arrays (e.g. `data.Close`) to be indexed
-          Pandas series, you can call their `.to_series()` method
-          (e.g. `data.Close.to_series()`).
+          Pandas series, you can call their `.s` accessor
+          (e.g. `data.Close.s`).
         """
         return self._data
 
@@ -994,11 +994,12 @@ def run(self, **kwargs) -> pd.Series:
 
         Keyword arguments are interpreted as strategy parameters.
         """
-        data = _Data(self._data)
+        data = _Data(self._data.copy(deep=False))
         broker = self._broker(data=data)  # type: _Broker
         strategy = self._strategy(broker, data, kwargs)  # type: Strategy
 
         strategy.init()
+        data._update()  # Strategy.init might have changed/added to data.df
 
         # Indicators used in Strategy.next()
         indicator_attrs = {attr: indicator
diff --git a/backtesting/lib.py b/backtesting/lib.py
@@ -183,7 +183,7 @@ class System(Strategy):
             def init(self):
                 # Strategy exposes `self.data` as raw NumPy arrays.
                 # Let's convert closing prices back to pandas Series.
-                close = self.data.Close.to_series()
+                close = self.data.Close.s
 
                 # Resample to daily resolution. Aggregate groups
                 # using their last value (i.e. closing price at the end
@@ -213,9 +213,8 @@ def func(x, *_, **__):
         assert isinstance(series, _Array), \
             'resample_apply() takes either a `pd.Series`, `pd.DataFrame`, ' \
             'or a `Strategy.data.*` array'
-        series = series.to_series()
+        series = series.s
 
-    series = series.copy()  # XXX: pandas 1.0.1 bug https://github.com/pandas-dev/pandas/issues/31710  # noqa: E501
     resampled = series.resample(rule, label='right').agg(agg).dropna()
     resampled.name = _as_str(series) + '[' + rule + ']'
 
diff --git a/backtesting/test/_test.py b/backtesting/test/_test.py
@@ -560,7 +560,7 @@ def test_plot_heatmaps(self):
     def test_SignalStrategy(self):
         class S(SignalStrategy):
             def init(self):
-                sma = self.data.Close.to_series().rolling(10).mean()
+                sma = self.data.Close.s.rolling(10).mean()
                 self.set_signal(self.data.Close > sma,
                                 self.data.Close < sma)
 
@@ -573,7 +573,7 @@ def init(self):
                 super().init()
                 self.set_atr_periods(40)
                 self.set_trailing_sl(3)
-                self.sma = self.I(lambda: self.data.Close.to_series().rolling(10).mean())
+                self.sma = self.I(lambda: self.data.Close.s.rolling(10).mean())
 
             def next(self):
                 super().next()
@@ -603,6 +603,22 @@ class Class:
         for s in ('Open', 'High', 'Low', 'Close', 'Volume'):
             self.assertEqual(_as_str(_Array([1], name=s)), s[0])
 
+    def test_pandas_accessors(self):
+        class S(Strategy):
+            def init(self):
+                close, index = self.data.Close, self.data.index
+                assert close.s.equals(pd.Series(close, index=index))
+                assert self.data.df['Close'].equals(pd.Series(close, index=index))
+                self.data.df['new_key'] = 2 * close
+
+            def next(self):
+                close, index = self.data.Close, self.data.index
+                assert close.s.equals(pd.Series(close, index=index))
+                assert self.data.df['Close'].equals(pd.Series(close, index=index))
+                assert self.data.df['new_key'].equals(pd.Series(self.data.new_key, index=index))
+
+        Backtest(GOOG.iloc[:20], S).run()
+
 
 @unittest.skipUnless(
     os.path.isdir(os.path.join(os.path.dirname(__file__),