From 4a577a9f71352b1dadac5158eb49ed2e414743b0 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 24 Oct 2020 13:48:51 -0500 Subject: [PATCH 1/9] PERF: release gil for ewma_time --- pandas/_libs/window/aggregations.pyx | 48 ++++++++++++++++------------ 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index b50eaf800533a..c0d329e462423 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1,14 +1,13 @@ # cython: boundscheck=False, wraparound=False, cdivision=True import cython -from cython import Py_ssize_t from libcpp.deque cimport deque import numpy as np cimport numpy as cnp -from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t +from numpy cimport float32_t, float64_t, int64_t, ndarray cnp.import_array() @@ -1369,7 +1368,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights, # ---------------------------------------------------------------------- # Exponentially weighted moving average -def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times, +def ewma_time(float64_t[:] vals, int minp, ndarray[int64_t] times, int64_t halflife): """ Compute exponentially-weighted moving average using halflife and time @@ -1387,30 +1386,39 @@ def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times, ndarray """ cdef: - Py_ssize_t i, num_not_nan = 0, N = len(vals) + Py_ssize_t i, j, num_not_nan = 0, N = len(vals) bint is_not_nan - float64_t last_result - ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8) - ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64) + float64_t last_result, weights_dot, weights_sum, weight + float64_t[:] times_float + float64_t[:] observations = np.zeros(N, dtype=float) + float64_t[:] times_masked = np.zeros(N, dtype=float) + ndarray[float64_t] output = np.empty(N, dtype=float) + float64_t[:] output_view = output if N == 0: return output + times_float = times.astype(float) last_result = vals[0] - for i in range(N): - is_not_nan = vals[i] == vals[i] - num_not_nan += is_not_nan - if is_not_nan: - mask[i] = 1 - weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife) - observations = vals[mask.view(np.bool_)] - last_result = np.sum(weights * observations) / np.sum(weights) - - if num_not_nan >= minp: - output[i] = last_result - else: - output[i] = NaN + with nogil: + for i in range(N): + is_not_nan = vals[i] == vals[i] + num_not_nan += is_not_nan + if is_not_nan: + times_masked[num_not_nan-1] = times_float[i] + observations[num_not_nan-1] = vals[i] + + weights_sum = 0 + weights_dot = 0 + for j in range(num_not_nan): + weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife) + weights_sum += weight + weights_dot += weight * observations[j] + + last_result = weights_dot / weights_sum + + output_view[i] = last_result if num_not_nan >= minp else NaN return output From 386e666529785e4ef0a2b3bed1e5c571ff2c3f31 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 24 Oct 2020 22:53:00 -0500 Subject: [PATCH 2/9] use const --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index c0d329e462423..9602ddd82dd98 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1368,7 +1368,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights, # ---------------------------------------------------------------------- # Exponentially weighted moving average -def ewma_time(float64_t[:] vals, int minp, ndarray[int64_t] times, +def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, int64_t halflife): """ Compute exponentially-weighted moving average using halflife and time From e2acdddf894c5493e6ede7fd348aafad5f0faf25 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Mon, 26 Oct 2020 14:18:03 -0500 Subject: [PATCH 3/9] remove view --- pandas/_libs/window/aggregations.pyx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 038d0be6ec83e..b7266527c7ad1 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1410,7 +1410,6 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, float64_t[:] observations = np.zeros(N, dtype=float) float64_t[:] times_masked = np.zeros(N, dtype=float) ndarray[float64_t] output = np.empty(N, dtype=float) - float64_t[:] output_view = output if N == 0: return output @@ -1435,7 +1434,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, last_result = weights_dot / weights_sum - output_view[i] = last_result if num_not_nan >= minp else NaN + output[i] = last_result if num_not_nan >= minp else NaN return output From 10c321da2f7ea6d81ff39b30e020cd791cc9eb05 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 09:41:54 -0500 Subject: [PATCH 4/9] cast halflife to float --- pandas/_libs/window/aggregations.pyx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index bbb53c31044fa..a822dc7527320 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1417,7 +1417,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, cdef: Py_ssize_t i, j, num_not_nan = 0, N = len(vals) bint is_not_nan - float64_t last_result, weights_dot, weights_sum, weight + float64_t last_result, weights_dot, weights_sum, weight, halflife_flot float64_t[:] times_float float64_t[:] observations = np.zeros(N, dtype=float) float64_t[:] times_masked = np.zeros(N, dtype=float) @@ -1426,6 +1426,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, if N == 0: return output + halflife_flot = halflife times_float = times.astype(float) last_result = vals[0] @@ -1440,7 +1441,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, weights_sum = 0 weights_dot = 0 for j in range(num_not_nan): - weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife) + weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_flot) weights_sum += weight weights_dot += weight * observations[j] From 2d51fe946f4f33ed5c6f5ef58f8e3a423c2398f6 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 09:42:27 -0500 Subject: [PATCH 5/9] cast halflife to float --- pandas/_libs/window/aggregations.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index a822dc7527320..c05b90bf4986a 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1426,7 +1426,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, if N == 0: return output - halflife_flot = halflife + halflife_float = halflife times_float = times.astype(float) last_result = vals[0] @@ -1441,7 +1441,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, weights_sum = 0 weights_dot = 0 for j in range(num_not_nan): - weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_flot) + weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_float) weights_sum += weight weights_dot += weight * observations[j] From c254a211744fdf9e54710876166d8946a7c954f2 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 09:52:45 -0500 Subject: [PATCH 6/9] cast halflife to float --- pandas/_libs/window/aggregations.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index c05b90bf4986a..2401f137bf209 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1417,7 +1417,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, cdef: Py_ssize_t i, j, num_not_nan = 0, N = len(vals) bint is_not_nan - float64_t last_result, weights_dot, weights_sum, weight, halflife_flot + float64_t last_result, weights_dot, weights_sum, weight, halflife_float float64_t[:] times_float float64_t[:] observations = np.zeros(N, dtype=float) float64_t[:] times_masked = np.zeros(N, dtype=float) @@ -1426,7 +1426,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, if N == 0: return output - halflife_float = halflife + halflife_float = halflife times_float = times.astype(float) last_result = vals[0] From 1c53a4db5a337986e33c83ff36bfecfce24eef4f Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 15:21:51 -0500 Subject: [PATCH 7/9] fix double indent --- pandas/_libs/window/aggregations.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 2401f137bf209..731cdd99c719f 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1441,9 +1441,9 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, weights_sum = 0 weights_dot = 0 for j in range(num_not_nan): - weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_float) - weights_sum += weight - weights_dot += weight * observations[j] + weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_float) + weights_sum += weight + weights_dot += weight * observations[j] last_result = weights_dot / weights_sum From a1f223c97577c378d74d8cf4cc13c566dd20089b Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 15:34:26 -0500 Subject: [PATCH 8/9] fix long line --- pandas/_libs/window/aggregations.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index 731cdd99c719f..dc27410b29680 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1441,7 +1441,8 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, weights_sum = 0 weights_dot = 0 for j in range(num_not_nan): - weight = 0.5 ** ((times_float[i] - times_masked[j]) / halflife_float) + weight = 0.5 ** ( + (times_float[i] - times_masked[j]) / halflife_float) weights_sum += weight weights_dot += weight * observations[j] From 27402ecec495bf8205715e293c8a39e6254b2692 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Sat, 31 Oct 2020 15:47:08 -0500 Subject: [PATCH 9/9] fix long line --- pandas/_libs/window/aggregations.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx index dc27410b29680..3556085bb300b 100644 --- a/pandas/_libs/window/aggregations.pyx +++ b/pandas/_libs/window/aggregations.pyx @@ -1442,7 +1442,7 @@ def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times, weights_dot = 0 for j in range(num_not_nan): weight = 0.5 ** ( - (times_float[i] - times_masked[j]) / halflife_float) + (times_float[i] - times_masked[j]) / halflife_float) weights_sum += weight weights_dot += weight * observations[j]