From 87475d7d776a2fc22d088ff37e09650eea607217 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 May 2020 13:48:58 -0500 Subject: [PATCH 1/5] Fixup 32-bit build --- pandas/_libs/hashtable_func_helper.pxi.in | 10 ++++++---- pandas/_libs/src/parser/tokenizer.c | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index c63f368dfae43..c3f1b1cfa9486 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -84,7 +84,7 @@ cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna): int64_t[:] result_counts {{endif}} - Py_ssize_t k + khiter_t k table = kh_init_{{ttype}}() {{if dtype == 'object'}} @@ -132,7 +132,8 @@ def duplicated_{{dtype}}(const {{c_type}}[:] values, object keep='first'): {{if dtype != 'object'}} {{dtype}}_t value {{endif}} - Py_ssize_t k, i, n = len(values) + Py_ssize_t i, n = len(values) + khiter_t k kh_{{ttype}}_t *table = kh_init_{{ttype}}() ndarray[uint8_t, ndim=1, cast=True] out = np.empty(n, dtype='bool') @@ -222,7 +223,8 @@ def ismember_{{dtype}}(const {{c_type}}[:] arr, {{c_type}}[:] values): boolean ndarry len of (arr) """ cdef: - Py_ssize_t i, n, k + Py_ssize_t i, n + khiter_t k int ret = 0 ndarray[uint8_t] result {{c_type}} val @@ -295,7 +297,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna): cdef: int count, max_count = 1 int j = -1 # so you can do += - Py_ssize_t k + khiter_t k kh_{{table_type}}_t *table ndarray[{{ctype}}] modes diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 7ba1a6cd398c9..a195c0daf5271 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -709,7 +709,7 @@ int skip_this_line(parser_t *self, int64_t rownum) { } int tokenize_bytes(parser_t *self, - size_t line_limit, int64_t start_lines) { + size_t line_limit, uint64_t start_lines) { int64_t i; uint64_t slen; int should_skip; @@ -1348,7 +1348,7 @@ int parser_trim_buffers(parser_t *self) { int _tokenize_helper(parser_t *self, size_t nrows, int all) { int status = 0; - int64_t start_lines = self->lines; + uint64_t start_lines = self->lines; if (self->state == FINISHED) { return 0; From d3bf920f361adb64f050c033076fedf038c5d748 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 May 2020 14:28:00 -0500 Subject: [PATCH 2/5] add job --- ci/deps/azure-38.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 ci/deps/azure-38.yaml diff --git a/ci/deps/azure-38.yaml b/ci/deps/azure-38.yaml new file mode 100644 index 0000000000000..3cc8bf3ceaecc --- /dev/null +++ b/ci/deps/azure-38.yaml @@ -0,0 +1,19 @@ +name: pandas-dev +channels: + - defaults + - conda-forge +dependencies: + - python=3.8.* + + # tools + - cython>=0.29.16 + - pytest>=5.0.1 + - pytest-xdist>=1.21 + - hypothesis>=3.58.0 + + # pandas dependencies + - numpy + - python-dateutil + - nomkl + - pytz + - pip From bf6d1a8a7437aac656f8189ddf6d75aa8bcaa0f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 27 May 2020 14:58:29 -0500 Subject: [PATCH 3/5] try more --- ci/deps/azure-38.yaml | 19 ------------------- pandas/_libs/tslibs/period.pyx | 4 ++-- 2 files changed, 2 insertions(+), 21 deletions(-) delete mode 100644 ci/deps/azure-38.yaml diff --git a/ci/deps/azure-38.yaml b/ci/deps/azure-38.yaml deleted file mode 100644 index 3cc8bf3ceaecc..0000000000000 --- a/ci/deps/azure-38.yaml +++ /dev/null @@ -1,19 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - python=3.8.* - - # tools - - cython>=0.29.16 - - pytest>=5.0.1 - - pytest-xdist>=1.21 - - hypothesis>=3.58.0 - - # pandas dependencies - - numpy - - python-dateutil - - nomkl - - pytz - - pip diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index ec6f8de159dae..6738b1ddbb7cf 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -73,7 +73,7 @@ from pandas._libs.tslibs.tzconversion cimport tz_convert_utc_to_tzlocal cdef: enum: - INT32_MIN = -2_147_483_648 + INT32_MIN = -2_147_483_648LL ctypedef struct asfreq_info: @@ -107,7 +107,7 @@ cdef extern from *: // must use npy typedef b/c int64_t is aliased in cython-generated c static npy_int64 daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, - {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, + {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL}, {0, 0, 1, 60, 60000, 60000000, 60000000000}, {0, 0, 0, 1, 1000, 1000000, 1000000000}, {0, 0, 0, 0, 1, 1000, 1000000}, From 580cd7a98f465da41c31a85bda22faa5da6d2fcd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 May 2020 16:04:32 -0500 Subject: [PATCH 4/5] fixups --- pandas/_libs/hashtable_func_helper.pxi.in | 2 ++ pandas/_libs/tslibs/period.pyx | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/hashtable_func_helper.pxi.in b/pandas/_libs/hashtable_func_helper.pxi.in index c3f1b1cfa9486..326ae36c6a12c 100644 --- a/pandas/_libs/hashtable_func_helper.pxi.in +++ b/pandas/_libs/hashtable_func_helper.pxi.in @@ -84,6 +84,7 @@ cpdef value_count_{{dtype}}({{c_type}}[:] values, bint dropna): int64_t[:] result_counts {{endif}} + # Don't use Py_ssize_t, since table.n_buckets is unsigned khiter_t k table = kh_init_{{ttype}}() @@ -297,6 +298,7 @@ def mode_{{dtype}}({{ctype}}[:] values, bint dropna): cdef: int count, max_count = 1 int j = -1 # so you can do += + # Don't use Py_ssize_t, since table.n_buckets is unsigned khiter_t k kh_{{table_type}}_t *table ndarray[{{ctype}}] modes diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 2ed4961282d74..1e44eb6610096 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -110,7 +110,7 @@ cdef extern from *: // must use npy typedef b/c int64_t is aliased in cython-generated c static npy_int64 daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, - {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL}, + {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, {0, 0, 1, 60, 60000, 60000000, 60000000000}, {0, 0, 0, 1, 1000, 1000000, 1000000000}, {0, 0, 0, 0, 1, 1000, 1000000}, From d64de6e95d2eaace1ef15cf0e0ceb1b589921ef8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 28 May 2020 16:15:33 -0500 Subject: [PATCH 5/5] fixups --- pandas/_libs/tslibs/period.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 1e44eb6610096..aadf2c41f7941 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -108,9 +108,11 @@ cdef extern from *: #define FR_UND -10000 /* Undefined */ // must use npy typedef b/c int64_t is aliased in cython-generated c + // unclear why we need LL for that row. + // see https://github.com/pandas-dev/pandas/pull/34416/ static npy_int64 daytime_conversion_factor_matrix[7][7] = { {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000}, - {0, 1, 60, 3600, 3600000, 3600000000, 3600000000000}, + {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL}, {0, 0, 1, 60, 60000, 60000000, 60000000000}, {0, 0, 0, 1, 1000, 1000000, 1000000000}, {0, 0, 0, 0, 1, 1000, 1000000},