From f55a827a059aa7be4ff743a85ceb76197eeb1c50 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 08:44:11 -0500 Subject: [PATCH 01/13] Try removing feather --- ci/azure-windows-36.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index 6230e9b6a1885..db4d429e9d730 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -6,7 +6,6 @@ dependencies: - blosc - bottleneck - fastparquet - - feather-format - matplotlib - numexpr - numpy=1.14* @@ -17,7 +16,6 @@ dependencies: - python=3.6.* - pytz - scipy - - thrift=0.10* - xlrd - xlsxwriter - xlwt From 4097bdc7871b73ec9eba886a2ae3584409d03920 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 08:45:27 -0500 Subject: [PATCH 02/13] pin pyarrow --- ci/azure-windows-36.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index db4d429e9d730..cfb4df075e0dc 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -10,7 +10,7 @@ dependencies: - numexpr - numpy=1.14* - openpyxl=2.5.5 - - pyarrow + - pyarrow=0.9.0 - pytables - python-dateutil - python=3.6.* From 40dafbde37d674ece5303ebea4c2a9141f2366cd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 09:19:20 -0500 Subject: [PATCH 03/13] Revert "Try removing feather" This reverts commit f55a827a059aa7be4ff743a85ceb76197eeb1c50. --- ci/azure-windows-36.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index cfb4df075e0dc..4dbb4bf0fe023 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -6,6 +6,7 @@ dependencies: - blosc - bottleneck - fastparquet + - feather-format - matplotlib - numexpr - numpy=1.14* @@ -16,6 +17,7 @@ dependencies: - python=3.6.* - pytz - scipy + - thrift=0.10* - xlrd - xlsxwriter - xlwt From 0c1e34e172905b5e1fa90f46dc6a9a70fb4f371d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 09:19:50 -0500 Subject: [PATCH 04/13] pin boost-cpp --- ci/azure-windows-36.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index 4dbb4bf0fe023..59c6bea65a9f8 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -5,6 +5,7 @@ channels: dependencies: - blosc - bottleneck + - boost-cpp<1.67 - fastparquet - feather-format - matplotlib From 7ef69bdb5a9948c3d3ae801ffc583dcb1cce4505 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 09:20:11 -0500 Subject: [PATCH 05/13] remove arrow pin --- ci/azure-windows-36.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index 59c6bea65a9f8..c57bd66a2c82d 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -12,7 +12,7 @@ dependencies: - numexpr - numpy=1.14* - openpyxl=2.5.5 - - pyarrow=0.9.0 + - pyarrow - pytables - python-dateutil - python=3.6.* From c512f4200be5147e9e8a11a46a45d2cb0a406031 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 14:17:09 -0500 Subject: [PATCH 06/13] test isnan --- pandas/_libs/window.pyx | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index d4b61b8611b68..77d6af7d4cc1a 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -381,7 +381,7 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, count_x = 0.0 for j in range(s, e): val = input[j] - if val == val: + if not isnan(val): count_x += 1.0 else: @@ -389,13 +389,13 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if val == val: + if not isnan(val): count_x -= 1.0 # calculate adds for j in range(end[i - 1], e): val = input[j] - if val == val: + if not isnan(val): count_x += 1.0 if count_x >= minp: @@ -424,7 +424,7 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: """ add a value from the sum calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] + 1 sum_x[0] = sum_x[0] + val @@ -432,7 +432,7 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: """ remove a value from the sum calc """ - if val == val: + if not isnan(val): nobs[0] = nobs[0] - 1 sum_x[0] = sum_x[0] - val @@ -538,7 +538,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, """ add a value from the mean calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] + 1 sum_x[0] = sum_x[0] + val if signbit(val): @@ -549,7 +549,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ - if val == val: + if not isnan(val): nobs[0] = nobs[0] - 1 sum_x[0] = sum_x[0] - val if signbit(val): @@ -671,8 +671,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, """ remove a value from the var calc """ cdef double delta - # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] - 1 if nobs[0]: # a part of Welford's method for the online variance-calculation @@ -760,7 +759,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, val = input[i] prev = input[i - win] - if val == val: + if not isnan(val): if prev == prev: # Adding one observation and removing another one @@ -822,7 +821,7 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, """ add a value from the skew calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] + 1 # seriously don't ask me why this is faster @@ -836,7 +835,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, """ remove a value from the skew calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] - 1 # seriously don't ask me why this is faster @@ -959,7 +958,7 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, """ add a value from the kurotic calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] + 1 # seriously don't ask me why this is faster @@ -974,7 +973,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, """ remove a value from the kurotic calc """ # Not NaN - if val == val: + if not isnan(val): nobs[0] = nobs[0] - 1 # seriously don't ask me why this is faster @@ -1089,7 +1088,7 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # setup val = input[i] - if val == val: + if not isnan(val): nobs += 1 err = skiplist_insert(sl, val) != 1 if err: @@ -1100,14 +1099,14 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if val == val: + if not isnan(val): skiplist_remove(sl, val) nobs -= 1 # calculate adds for j in range(end[i - 1], e): val = input[j] - if val == val: + if not isnan(val): nobs += 1 err = skiplist_insert(sl, val) != 1 if err: @@ -1472,7 +1471,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # setup val = input[i] - if val == val: + if not isnan(val): nobs += 1 skiplist_insert(skiplist, val) @@ -1481,14 +1480,14 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if val == val: + if not isnan(val): skiplist_remove(skiplist, val) nobs -= 1 # calculate adds for j in range(end[i - 1], e): val = input[j] - if val == val: + if not isnan(val): nobs += 1 skiplist_insert(skiplist, val) From b5313f923c829388d043f9c7b7bc2adf62c2dd89 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 16 Oct 2018 15:01:18 -0500 Subject: [PATCH 07/13] added parquet-cpp --- ci/azure-windows-36.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/azure-windows-36.yaml b/ci/azure-windows-36.yaml index c57bd66a2c82d..656a6a31d92b4 100644 --- a/ci/azure-windows-36.yaml +++ b/ci/azure-windows-36.yaml @@ -12,6 +12,7 @@ dependencies: - numexpr - numpy=1.14* - openpyxl=2.5.5 + - parquet-cpp - pyarrow - pytables - python-dateutil From 29f392c4fdfa23ab6d16f31d824eab143e657cb4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 05:46:12 -0500 Subject: [PATCH 08/13] Try defining notnan --- pandas/_libs/src/headers/cmath | 7 +++++- pandas/_libs/window.pyx | 39 +++++++++++++++++----------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 2bccf9bb13d77..11c68cc22aa51 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -8,9 +8,14 @@ namespace std { __inline int isnan(double x) { return _isnan(x); } __inline int signbit(double num) { return _copysign(1.0, num) < 0; } + __inline int notnan(double x) { return not isnan(x); } } #else #include -#endif +namespace std { + __inline int notnan(double x) { return x == x; } +} + +#endif #endif diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 77d6af7d4cc1a..989dc4dd17a37 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -15,6 +15,7 @@ cnp.import_array() cdef extern from "src/headers/cmath" namespace "std": bint isnan(double) nogil + bint notnan(double) nogil int signbit(double) nogil double sqrt(double x) nogil @@ -381,7 +382,7 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, count_x = 0.0 for j in range(s, e): val = input[j] - if not isnan(val): + if notnan(val): count_x += 1.0 else: @@ -389,13 +390,13 @@ def roll_count(ndarray[double_t] input, int64_t win, int64_t minp, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if not isnan(val): + if notnan(val): count_x -= 1.0 # calculate adds for j in range(end[i - 1], e): val = input[j] - if not isnan(val): + if notnan(val): count_x += 1.0 if count_x >= minp: @@ -424,7 +425,7 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: """ add a value from the sum calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] + 1 sum_x[0] = sum_x[0] + val @@ -432,7 +433,7 @@ cdef inline void add_sum(double val, int64_t *nobs, double *sum_x) nogil: cdef inline void remove_sum(double val, int64_t *nobs, double *sum_x) nogil: """ remove a value from the sum calc """ - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] - 1 sum_x[0] = sum_x[0] - val @@ -538,7 +539,7 @@ cdef inline void add_mean(double val, Py_ssize_t *nobs, double *sum_x, """ add a value from the mean calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] + 1 sum_x[0] = sum_x[0] + val if signbit(val): @@ -549,7 +550,7 @@ cdef inline void remove_mean(double val, Py_ssize_t *nobs, double *sum_x, Py_ssize_t *neg_ct) nogil: """ remove a value from the mean calc """ - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] - 1 sum_x[0] = sum_x[0] - val if signbit(val): @@ -671,7 +672,7 @@ cdef inline void remove_var(double val, double *nobs, double *mean_x, """ remove a value from the var calc """ cdef double delta - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] - 1 if nobs[0]: # a part of Welford's method for the online variance-calculation @@ -759,7 +760,7 @@ def roll_var(ndarray[double_t] input, int64_t win, int64_t minp, val = input[i] prev = input[i - win] - if not isnan(val): + if notnan(val): if prev == prev: # Adding one observation and removing another one @@ -821,7 +822,7 @@ cdef inline void add_skew(double val, int64_t *nobs, double *x, double *xx, """ add a value from the skew calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] + 1 # seriously don't ask me why this is faster @@ -835,7 +836,7 @@ cdef inline void remove_skew(double val, int64_t *nobs, double *x, double *xx, """ remove a value from the skew calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] - 1 # seriously don't ask me why this is faster @@ -958,7 +959,7 @@ cdef inline void add_kurt(double val, int64_t *nobs, double *x, double *xx, """ add a value from the kurotic calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] + 1 # seriously don't ask me why this is faster @@ -973,7 +974,7 @@ cdef inline void remove_kurt(double val, int64_t *nobs, double *x, double *xx, """ remove a value from the kurotic calc """ # Not NaN - if not isnan(val): + if notnan(val): nobs[0] = nobs[0] - 1 # seriously don't ask me why this is faster @@ -1088,7 +1089,7 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # setup val = input[i] - if not isnan(val): + if notnan(val): nobs += 1 err = skiplist_insert(sl, val) != 1 if err: @@ -1099,14 +1100,14 @@ def roll_median_c(ndarray[float64_t] input, int64_t win, int64_t minp, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if not isnan(val): + if notnan(val): skiplist_remove(sl, val) nobs -= 1 # calculate adds for j in range(end[i - 1], e): val = input[j] - if not isnan(val): + if notnan(val): nobs += 1 err = skiplist_insert(sl, val) != 1 if err: @@ -1471,7 +1472,7 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # setup val = input[i] - if not isnan(val): + if notnan(val): nobs += 1 skiplist_insert(skiplist, val) @@ -1480,14 +1481,14 @@ def roll_quantile(ndarray[float64_t, cast=True] input, int64_t win, # calculate deletes for j in range(start[i - 1], s): val = input[j] - if not isnan(val): + if notnan(val): skiplist_remove(skiplist, val) nobs -= 1 # calculate adds for j in range(end[i - 1], e): val = input[j] - if not isnan(val): + if notnan(val): nobs += 1 skiplist_insert(skiplist, val) From d4c23f7027c15e7585cb8ff023dd96940bb386df Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 06:36:37 -0500 Subject: [PATCH 09/13] fixup --- pandas/_libs/src/headers/cmath | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 11c68cc22aa51..2a07aa5900777 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -8,7 +8,7 @@ namespace std { __inline int isnan(double x) { return _isnan(x); } __inline int signbit(double num) { return _copysign(1.0, num) < 0; } - __inline int notnan(double x) { return not isnan(x); } + __inline int notnan(double x) { return !isnan(x); } } #else #include From 0b7dc84e97245d719f8796509cb2fe388a72d3b3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 07:21:06 -0500 Subject: [PATCH 10/13] Define notnan, apply change to just MSVC 2017 --- pandas/_libs/src/headers/cmath | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 2a07aa5900777..f2fdf959c0aa7 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -1,14 +1,13 @@ #ifndef _PANDAS_MATH_H_ #define _PANDAS_MATH_H_ -// In older versions of Visual Studio there wasn't a std::signbit defined -// This defines it using _copysign -#if defined(_MSC_VER) && (_MSC_VER < 1800) +// MSVC 2017 has a bug where `x == x` can be true for NaNs. +// MSC_VER from https://stackoverflow.com/a/70630/1889400 +// Place upper bound on this check once a fixed MSVC is released. +#if defined(_MSC_VER) && (_MSC_VER >= 1900) #include namespace std { - __inline int isnan(double x) { return _isnan(x); } - __inline int signbit(double num) { return _copysign(1.0, num) < 0; } - __inline int notnan(double x) { return !isnan(x); } + __inline int notnan(double x) { return not isnan(x); } } #else #include From 76c4d06ae1a4d0471830b623028582bac2583671 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 07:23:17 -0500 Subject: [PATCH 11/13] not to ! --- pandas/_libs/src/headers/cmath | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index f2fdf959c0aa7..1d1ece0d77b10 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -7,7 +7,7 @@ #if defined(_MSC_VER) && (_MSC_VER >= 1900) #include namespace std { - __inline int notnan(double x) { return not isnan(x); } + __inline int notnan(double x) { return !isnan(x); } } #else #include From e6fd3119061abda67ba3ccf17068142d1ad909ec Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 08:00:31 -0500 Subject: [PATCH 12/13] define isnan, signbit --- pandas/_libs/src/headers/cmath | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index 1d1ece0d77b10..d74a1c679cdde 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -7,8 +7,17 @@ #if defined(_MSC_VER) && (_MSC_VER >= 1900) #include namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } __inline int notnan(double x) { return !isnan(x); } } +#else if defined(_MSC_VER) +#include +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } + __inline int notnan(double x) { return x == x; } +} #else #include From 6af34616f105e9ef2a6cef468e47d4c563185a0c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 17 Oct 2018 08:24:04 -0500 Subject: [PATCH 13/13] try again --- pandas/_libs/src/headers/cmath | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/src/headers/cmath b/pandas/_libs/src/headers/cmath index d74a1c679cdde..632e1fc2390d0 100644 --- a/pandas/_libs/src/headers/cmath +++ b/pandas/_libs/src/headers/cmath @@ -4,18 +4,25 @@ // MSVC 2017 has a bug where `x == x` can be true for NaNs. // MSC_VER from https://stackoverflow.com/a/70630/1889400 // Place upper bound on this check once a fixed MSVC is released. -#if defined(_MSC_VER) && (_MSC_VER >= 1900) +#if defined(_MSC_VER) && (_MSC_VER < 1800) +#include +// In older versions of Visual Studio there wasn't a std::signbit defined +// This defines it using _copysign +namespace std { + __inline int isnan(double x) { return _isnan(x); } + __inline int signbit(double num) { return _copysign(1.0, num) < 0; } + __inline int notnan(double x) { return !isnan(x); } +} +#elif defined(_MSC_VER) && (_MSC_VER >= 1900) #include namespace std { __inline int isnan(double x) { return _isnan(x); } - __inline int signbit(double num) { return _copysign(1.0, num) < 0; } __inline int notnan(double x) { return !isnan(x); } } -#else if defined(_MSC_VER) +#elif defined(_MSC_VER) #include namespace std { __inline int isnan(double x) { return _isnan(x); } - __inline int signbit(double num) { return _copysign(1.0, num) < 0; } __inline int notnan(double x) { return x == x; } } #else