diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index ff6570e2106b2..7f4a2eeafeea2 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -29,7 +29,7 @@ dtypes = [('Float64', 'float64', 'float64_t'), ctypedef struct {{name}}VectorData: {{arg}} *data - size_t n, m + Py_ssize_t n, m {{endif}} @@ -147,7 +147,7 @@ cdef class StringVector: cdef resize(self): cdef: char **orig_data - size_t i, m + Py_ssize_t i, m m = self.data.m self.data.m = max(self.data.m * 4, _INIT_VEC_CAP) @@ -172,7 +172,7 @@ cdef class StringVector: def to_array(self): cdef: ndarray ao - size_t n + Py_ssize_t n object val ao = np.empty(self.data.n, dtype=np.object) @@ -198,7 +198,7 @@ cdef class ObjectVector: cdef: PyObject **data - size_t n, m + Py_ssize_t n, m ndarray ao bint external_view_exists @@ -281,7 +281,7 @@ cdef class {{name}}HashTable(HashTable): def sizeof(self, deep=False): """ return the size of my table in bytes """ return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys - sizeof(size_t) + # vals + sizeof(Py_ssize_t) + # vals sizeof(uint32_t)) # flags cpdef get_item(self, {{dtype}}_t val): @@ -522,13 +522,13 @@ cdef class StringHashTable(HashTable): def sizeof(self, deep=False): """ return the size of my table in bytes """ return self.table.n_buckets * (sizeof(char *) + # keys - sizeof(size_t) + # vals + sizeof(Py_ssize_t) + # vals sizeof(uint32_t)) # flags cpdef get_item(self, object val): cdef: khiter_t k - char *v + const char *v v = util.get_c_string(val) k = kh_get_str(self.table, v) @@ -541,7 +541,7 @@ cdef class StringHashTable(HashTable): cdef: khiter_t k int ret = 0 - char *v + const char *v v = util.get_c_string(val) @@ -560,10 +560,10 @@ cdef class StringHashTable(HashTable): int64_t *resbuf = labels.data khiter_t k kh_str_t *table = self.table - char *v - char **vecs + const char *v + const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] v = util.get_c_string(val) @@ -589,10 +589,10 @@ cdef class StringHashTable(HashTable): object val ObjectVector uniques khiter_t k - char *v - char **vecs + const char *v + const char **vecs - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) uindexer = np.empty(n, dtype=np.int64) for i in range(n): val = values[i] @@ -627,7 +627,7 @@ cdef class StringHashTable(HashTable): Py_ssize_t i, n = len(values) int ret = 0 object val - char *v + const char *v khiter_t k int64_t[:] locs = np.empty(n, dtype=np.int64) @@ -660,12 +660,12 @@ cdef class StringHashTable(HashTable): Py_ssize_t i, n = len(values) int ret = 0 object val - char *v - char **vecs + const char *v + const char **vecs khiter_t k # these by-definition *must* be strings - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -693,8 +693,8 @@ cdef class StringHashTable(HashTable): Py_ssize_t idx, count = count_prior int ret = 0 object val - char *v - char **vecs + const char *v + const char **vecs khiter_t k bint use_na_value @@ -705,7 +705,7 @@ cdef class StringHashTable(HashTable): # pre-filter out missing # and assign pointers - vecs = malloc(n * sizeof(char *)) + vecs = malloc(n * sizeof(char *)) for i in range(n): val = values[i] @@ -769,7 +769,7 @@ cdef class PyObjectHashTable(HashTable): def sizeof(self, deep=False): """ return the size of my table in bytes """ return self.table.n_buckets * (sizeof(PyObject *) + # keys - sizeof(size_t) + # vals + sizeof(Py_ssize_t) + # vals sizeof(uint32_t)) # flags cpdef get_item(self, object val): diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c index 9e56802b92bf0..663ec66a35db2 100644 --- a/pandas/_libs/src/datetime/np_datetime.c +++ b/pandas/_libs/src/datetime/np_datetime.c @@ -329,10 +329,11 @@ int cmp_npy_datetimestruct(const npy_datetimestruct *a, * Returns -1 on error, 0 on success, and 1 (with no error set) * if obj doesn't have the needed date or datetime attributes. */ -int convert_pydatetime_to_datetimestruct(PyDateTime_Date *obj, +int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj, npy_datetimestruct *out) { // Assumes that obj is a valid datetime object PyObject *tmp; + PyObject *obj = (PyObject*)dtobj; /* Initialize the output to all zeros */ memset(out, 0, sizeof(npy_datetimestruct)); diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h index 4347d0c8c47d4..04009c6581ac0 100644 --- a/pandas/_libs/src/datetime/np_datetime.h +++ b/pandas/_libs/src/datetime/np_datetime.h @@ -31,7 +31,7 @@ extern const npy_datetimestruct _NS_MAX_DTS; // stuff pandas needs // ---------------------------------------------------------------------------- -int convert_pydatetime_to_datetimestruct(PyDateTime_Date *obj, +int convert_pydatetime_to_datetimestruct(PyDateTime_Date *dtobj, npy_datetimestruct *out); npy_datetime npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT base, diff --git a/pandas/_libs/src/parser/tokenizer.c b/pandas/_libs/src/parser/tokenizer.c index 25eede6c286dc..a18d12616a802 100644 --- a/pandas/_libs/src/parser/tokenizer.c +++ b/pandas/_libs/src/parser/tokenizer.c @@ -262,7 +262,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) { ("\n\nmake_stream_space: nbytes = %zu. grow_buffer(self->stream...)\n", nbytes)) self->stream = (char *)grow_buffer((void *)self->stream, self->stream_len, - (size_t*)&self->stream_cap, nbytes * 2, + (int64_t*)&self->stream_cap, nbytes * 2, sizeof(char), &status); TRACE( ("make_stream_space: self->stream=%p, self->stream_len = %zu, " @@ -289,7 +289,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) { cap = self->words_cap; self->words = (char **)grow_buffer((void *)self->words, self->words_len, - (size_t*)&self->words_cap, nbytes, + (int64_t*)&self->words_cap, nbytes, sizeof(char *), &status); TRACE( ("make_stream_space: grow_buffer(self->self->words, %zu, %zu, %zu, " @@ -320,7 +320,7 @@ static int make_stream_space(parser_t *self, size_t nbytes) { cap = self->lines_cap; self->line_start = (int64_t *)grow_buffer((void *)self->line_start, self->lines + 1, - (size_t*)&self->lines_cap, nbytes, + (int64_t*)&self->lines_cap, nbytes, sizeof(int64_t), &status); TRACE(( "make_stream_space: grow_buffer(self->line_start, %zu, %zu, %zu, %d)\n", diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c index 4bab32e93ab1e..8c7b92ddeaa81 100644 --- a/pandas/_libs/src/ujson/python/objToJSON.c +++ b/pandas/_libs/src/ujson/python/objToJSON.c @@ -427,7 +427,7 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue, #if (PY_VERSION_HEX >= 0x03030000) if (PyUnicode_IS_COMPACT_ASCII(obj)) { Py_ssize_t len; - char *data = PyUnicode_AsUTF8AndSize(obj, &len); + char *data = (char*)PyUnicode_AsUTF8AndSize(obj, &len); *_outLen = len; return data; } diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index cd3ce5c1a8f09..4054154cd285b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -927,7 +927,8 @@ def extract_freq(ndarray[object] values): # ----------------------------------------------------------------------- # period helpers - +@cython.wraparound(False) +@cython.boundscheck(False) cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, int freq, object tz): cdef: diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd index 305c4f8f908e0..efdb1570ed878 100644 --- a/pandas/_libs/tslibs/util.pxd +++ b/pandas/_libs/tslibs/util.pxd @@ -70,7 +70,7 @@ cdef extern from "../src/numpy_helper.h": int assign_value_1d(ndarray, Py_ssize_t, object) except -1 cnp.int64_t get_nat() object get_value_1d(ndarray, Py_ssize_t) - char *get_c_string(object) except NULL + const char *get_c_string(object) except NULL object char_to_string(char*) ctypedef fused numeric: diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx index 04bb330e595dd..427414b80dfe4 100644 --- a/pandas/io/msgpack/_unpacker.pyx +++ b/pandas/io/msgpack/_unpacker.pyx @@ -139,7 +139,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None, ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) - if off < buf_len: + if off < buf_len: raise ExtraData(obj, PyBytes_FromStringAndSize( buf + off, buf_len - off)) return obj @@ -367,9 +367,11 @@ cdef class Unpacker(object): self.buf_tail = tail + _buf_len cdef read_from_file(self): + # Assume self.max_buffer_size - (self.buf_tail - self.buf_head) >= 0 next_bytes = self.file_like_read( min(self.read_size, - self.max_buffer_size - (self.buf_tail - self.buf_head))) + (self.max_buffer_size - + (self.buf_tail - self.buf_head)))) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) @@ -417,7 +419,9 @@ cdef class Unpacker(object): def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" cdef size_t nread - nread = min(self.buf_tail - self.buf_head, nbytes) + + # Assume that self.buf_tail - self.buf_head >= 0 + nread = min((self.buf_tail - self.buf_head), nbytes) ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) self.buf_head += nread if len(ret) < nbytes and self.file_like is not None: diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx index e2a1107969990..3d94dc127a1d2 100644 --- a/pandas/io/sas/sas.pyx +++ b/pandas/io/sas/sas.pyx @@ -104,7 +104,8 @@ cdef ndarray[uint8_t, ndim=1] rle_decompress( raise ValueError("unknown control byte: {byte}" .format(byte=control_byte)) - if len(result) != result_length: + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(result) != result_length: raise ValueError("RLE: {got} != {expect}".format(got=len(result), expect=result_length)) @@ -186,12 +187,14 @@ cdef ndarray[uint8_t, ndim=1] rdc_decompress( else: raise ValueError("unknown RDC command") - if len(outbuff) != result_length: + # In py37 cython/clang sees `len(outbuff)` as size_t and not Py_ssize_t + if len(outbuff) != result_length: raise ValueError("RDC: {got} != {expect}\n" .format(got=len(outbuff), expect=result_length)) return np.asarray(outbuff) + cdef enum ColumnTypes: column_type_decimal = 1 column_type_string = 2 @@ -204,6 +207,7 @@ cdef int page_mix_types_1 = const.page_mix_types[1] cdef int page_data_type = const.page_data_type cdef int subheader_pointers_offset = const.subheader_pointers_offset + cdef class Parser(object): cdef: diff --git a/setup.py b/setup.py index 85c5970af018f..d265733738425 100755 --- a/setup.py +++ b/setup.py @@ -491,7 +491,6 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): if suffix == '.pyx': lib_depends = [srcpath(f, suffix='.pyx', subdir='_libs/src') for f in lib_depends] - lib_depends.append('pandas/_libs/util.pxd') else: lib_depends = [] @@ -507,7 +506,7 @@ def srcpath(name=None, suffix='.pyx', subdir='src'): np_datetime_sources = ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'] -tseries_depends = np_datetime_headers + ['pandas/_libs/tslibs/np_datetime.pxd'] +tseries_depends = np_datetime_headers ext_data = {