From af060ba9bed9014e471faaf66a315d7f96e153ec Mon Sep 17 00:00:00 2001
From: Jasmine Sandhu <jsandhu@continuum.io>
Date: Mon, 16 Nov 2015 15:03:49 -0800
Subject: [PATCH 1/3] ENH: GH4964 Separated array_to_roll in roll_generic

In fixing GH4964, added an array_to_roll argument to roll_generic().
This will be used by rolling_apply() to work with non-float dtypes.
array_to_roll defaults to None in which case, this function rolls over
input array as it did previously.
---
 pandas/algos.pyx | 33 ++++++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/pandas/algos.pyx b/pandas/algos.pyx
index 8569209f2e946..45dc64d81c338 100644
--- a/pandas/algos.pyx
+++ b/pandas/algos.pyx
@@ -1820,9 +1820,11 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int win,
 
     return output
 
+
 def roll_generic(ndarray[float64_t, cast=True] input,
                  int win, int minp, int offset,
-                 object func, object args, object kwargs):
+                 object func, object args, object kwargs,
+                 object array_to_roll=None):
     cdef ndarray[double_t] output, counts, bufarr
     cdef Py_ssize_t i, n
     cdef float64_t *buf
@@ -1837,32 +1839,41 @@ def roll_generic(ndarray[float64_t, cast=True] input,
 
     minp = _check_minp(win, minp, n, floor=0)
     output = np.empty(n, dtype=float)
-    counts = roll_sum(np.concatenate((np.isfinite(input).astype(float), np.array([0.] * offset))), win, minp)[offset:]
+    counts = roll_sum(np.concatenate((np.isfinite(input).astype(float),
+                                      np.array([0.] * offset))),
+                      win, minp)[offset:]
+
+    # default behavior is to roll over input array
+    if array_to_roll is None:
+        array_to_roll = input
 
     # truncated windows at the beginning, through first full-length window
     for i from 0 <= i < (int_min(win, n) - offset):
         if counts[i] >= minp:
-            output[i] = func(input[0 : (i + offset + 1)], *args, **kwargs)
+            output[i] = func(array_to_roll[0:(i + offset + 1)],
+                             *args,
+                             **kwargs)
         else:
             output[i] = NaN
 
     # remaining full-length windows
-    buf = <float64_t*> input.data
-    bufarr = np.empty(win, dtype=float)
-    oldbuf = <float64_t*> bufarr.data
+    # array_to_roll is a numpy array and doing a slice of contiguous data does
+    # not make a copy
     for i from (win - offset) <= i < (n - offset):
-        buf = buf + 1
-        bufarr.data = <char*> buf
         if counts[i] >= minp:
-            output[i] = func(bufarr, *args, **kwargs)
+            # full length windows will start at index 1 and be of length win
+            output[i] = \
+                func(array_to_roll[i - (win - offset) + 1:i + offset + 1],
+                     *args, **kwargs)
         else:
             output[i] = NaN
-    bufarr.data = <char*> oldbuf
 
     # truncated windows at the end
     for i from int_max(n - offset, 0) <= i < n:
         if counts[i] >= minp:
-            output[i] = func(input[int_max(i + offset - win + 1, 0) : n], *args, **kwargs)
+            output[i] = func(array_to_roll[int_max(i + offset - win + 1, 0):n],
+                             *args,
+                             **kwargs)
         else:
             output[i] = NaN
 

From da418efbceda1b77927d9c60647034d0f257a3a2 Mon Sep 17 00:00:00 2001
From: Jasmine Sandhu <jsandhu@continuum.io>
Date: Mon, 16 Nov 2015 15:09:01 -0800
Subject: [PATCH 2/3] ENH: GH4964 Add coercion kwarg to rolling_apply

Added coercion bool in rolling_apply and _process_data_structure()
It defaults to True in which case the _process_data_structure() converts
arg to float and things work as they did before this change (backwards
compatible). If user wishes to use rolling_apply() with string array,
then set coercion=False.

Default of coercion=True prioritizes performance.
---
 pandas/stats/moments.py | 54 +++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 16 deletions(-)

diff --git a/pandas/stats/moments.py b/pandas/stats/moments.py
index 3cddae45e7516..c4183d748fc9c 100644
--- a/pandas/stats/moments.py
+++ b/pandas/stats/moments.py
@@ -355,7 +355,7 @@ def rolling_corr_pairwise(df1, df2=None, window=None, min_periods=None,
 
 
 def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
-                    how=None, args=(), kwargs={}, **kwds):
+                    how=None, coercion=True, args=(), kwargs={}, **kwds):
     """
     Rolling statistical measure using supplied function. Designed to be
     used with passed-in Cython array-based functions.
@@ -374,6 +374,10 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
         Whether the label should correspond with center of window
     how : string, default 'mean'
         Method for down- or re-sampling
+    coercion: bool flag with default True. It tries to coerce args to a float
+        to optimize for speed. If rolling_apply() is invoked on objects that
+        cannot be coerced into a float, it raises a ValueError. Be sure
+        to set coercion=False in this case.
     args : tuple
         Passed on to func
     kwargs : dict
@@ -385,7 +389,7 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
     """
     arg = _conv_timerule(arg, freq, how)
 
-    return_hook, values = _process_data_structure(arg)
+    return_hook, values = _process_data_structure(arg, coercion=coercion)
 
     if values.size == 0:
         result = values.copy()
@@ -393,9 +397,18 @@ def _rolling_moment(arg, window, func, minp, axis=0, freq=None, center=False,
         # actually calculate the moment. Faster way to do this?
         offset = int((window - 1) / 2.) if center else 0
         additional_nans = np.array([np.NaN] * offset)
-        calc = lambda x: func(np.concatenate((x, additional_nans)) if center else x,
-                              window, minp=minp, args=args, kwargs=kwargs,
-                              **kwds)
+
+        if coercion:
+            calc = lambda x: func(np.concatenate((x, additional_nans)) if
+                                  center else x, window, minp=minp, args=args,
+                                  kwargs=kwargs, **kwds)
+        else:
+            p0 = np.arange(0, len(values), dtype=float)
+            calc = lambda x: func(np.concatenate((p0, additional_nans))
+                                      if center else p0, window, minp=minp,
+                                      args=args, kwargs=kwargs,
+                                      array_to_roll=x, **kwds)
+
         if values.ndim > 1:
             result = np.apply_along_axis(calc, axis, values)
         else:
@@ -423,7 +436,7 @@ def _center_window(rs, window, axis):
     return rs
 
 
-def _process_data_structure(arg, kill_inf=True):
+def _process_data_structure(arg, kill_inf=True, coercion=True):
     if isinstance(arg, DataFrame):
         return_hook = lambda v: type(arg)(v, index=arg.index,
                                           columns=arg.columns)
@@ -435,12 +448,13 @@ def _process_data_structure(arg, kill_inf=True):
         return_hook = lambda v: v
         values = arg
 
-    if not issubclass(values.dtype.type, float):
-        values = values.astype(float)
+    if coercion:
+        if not issubclass(values.dtype.type, float):
+            values = values.astype(float)
 
-    if kill_inf:
-        values = values.copy()
-        values[np.isinf(values)] = np.NaN
+        if kill_inf:
+            values = values.copy()
+            values[np.isinf(values)] = np.NaN
 
     return return_hook, values
 
@@ -712,7 +726,7 @@ def call_cython(arg, window, minp, args=(), kwargs={}):
 
 
 def rolling_apply(arg, window, func, min_periods=None, freq=None,
-                  center=False, args=(), kwargs={}):
+                  center=False, coercion=True, args=(), kwargs={}):
     """Generic moving function application.
 
     Parameters
@@ -731,6 +745,10 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None,
         as a frequency string or DateOffset object.
     center : boolean, default False
         Whether the label should correspond with center of window
+    coercion: bool flag with default True. It tries to coerce args to a float
+        to optimize for speed. If rolling_apply() is invoked on objects that
+        cannot be coerced into a float, it raises a ValueError. Be sure
+        to set coercion=False in this case.
     args : tuple
         Passed on to func
     kwargs : dict
@@ -750,11 +768,15 @@ def rolling_apply(arg, window, func, min_periods=None, freq=None,
     of :meth:`~pandas.Series.resample` (i.e. using the `mean`).
     """
     offset = int((window - 1) / 2.) if center else 0
-    def call_cython(arg, window, minp, args, kwargs):
+
+    def call_cython(arg, window, minp, args, kwargs, array_to_roll=None):
         minp = _use_window(minp, window)
-        return algos.roll_generic(arg, window, minp, offset, func, args, kwargs)
-    return _rolling_moment(arg, window, call_cython, min_periods, freq=freq,
-                           center=False, args=args, kwargs=kwargs)
+        return algos.roll_generic(arg, window, minp, offset, func, args,
+                                  kwargs, array_to_roll)
+
+    return _rolling_moment(arg, window, call_cython, min_periods,
+                           freq=freq, center=False, coercion=coercion,
+                           args=args, kwargs=kwargs)
 
 
 def rolling_window(arg, window=None, win_type=None, min_periods=None,

From ba73145695d64ef6672685fcf7c81ddc0be5d496 Mon Sep 17 00:00:00 2001
From: Jasmine Sandhu <jsandhu@continuum.io>
Date: Mon, 16 Nov 2015 15:15:08 -0800
Subject: [PATCH 3/3] ENH: GH4964 Add rolling_apply on strings as test

---
 pandas/stats/tests/test_moments.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py
index e2ed27156d2b5..69a436798e354 100644
--- a/pandas/stats/tests/test_moments.py
+++ b/pandas/stats/tests/test_moments.py
@@ -363,6 +363,27 @@ def roll_mean(x, window, min_periods=None, freq=None, center=False):
         expected = Series([1., 2., 2.])
         assert_series_equal(result, expected)
 
+    def test_rolling_apply_nonfloat(self):
+        '''
+        test rolling_apply now also works for non-float data types if coercion
+        is set to False. The return type is still float but the 'roll'
+        is applied to arg which no longer has to be a float
+        '''
+        # check rolling_apply with coercion set to False
+        orig = Series([ord('a'), ord('b'), ord('c')], dtype=float)
+        s = Series(['a', 'b', 'c'])
+
+        for min_p in (None, 0):
+            s_res = mom.rolling_apply(s, 2, lambda x: ord(x[-1]),
+                                      coercion=False, min_periods=min_p)
+            o_res = mom.rolling_apply(orig, 2, lambda x: x[-1],
+                                      coercion=False, min_periods=min_p)
+
+            # assert that NaN values appear at same place since min_periods
+            # defines the NaN values. Also assert that valid answers match
+            assert all(np.isfinite(s_res) == np.isfinite(o_res))
+            assert all(s_res[np.isfinite(s_res)] == o_res[np.isfinite(o_res)])
+
     def test_rolling_apply_out_of_bounds(self):
         # #1850
         arr = np.arange(4)