From fd1356b2343c4a41b5c0e8ef7b6735002bf9c151 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 13:38:49 -0700 Subject: [PATCH 1/8] add typing information --- pandas/_libs/tslibs/conversion.pyx | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index d199997d2e9fe..cf00319705ef1 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -856,10 +856,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, """ cdef: ndarray[int64_t] trans - int64_t[:] deltas, idx_shifted + int64_t[:] deltas, idx_shifted, idx_shifted_left, idx_shifted_right ndarray ambiguous_array Py_ssize_t i, idx, pos, ntrans, n = len(vals) - Py_ssize_t delta_idx_offset, delta_idx + Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins ndarray[int64_t] result, result_a, result_b, dst_hours @@ -907,7 +907,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, trans, deltas, typ = get_dst_info(tz) - tdata = cnp.PyArray_DATA(trans) + tdata = cnp.PyArray_DATA(trans) ntrans = len(trans) # Determine whether each date lies left of the DST transition (store in @@ -1032,8 +1032,10 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, return result -cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n): - cdef Py_ssize_t pivot, left = 0, right = n +cdef inline Py_ssize_t bisect_right_i8(int64_t *data, + int64_t val, Py_ssize_t n): + cdef: + Py_ssize_t pivot, left = 0, right = n assert n >= 1 From 0416af44bb3c67b27f355a57dde0eee210b8272f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 13:42:48 -0700 Subject: [PATCH 2/8] replace (python) fill calls with cython assignment --- pandas/_libs/algos.pyx | 4 ++-- pandas/_libs/groupby.pyx | 2 +- pandas/_libs/groupby_helper.pxi.in | 18 +++++++++--------- pandas/_libs/join.pyx | 2 +- pandas/_libs/lib.pyx | 2 +- pandas/_libs/sparse.pyx | 6 +++--- pandas/_libs/tslibs/conversion.pyx | 4 ++-- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 02815dce156fb..038c05b387558 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -409,7 +409,7 @@ def pad(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) + indexer[:] = -1 if limit is None: lim = nright @@ -607,7 +607,7 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None): nleft = len(old) nright = len(new) indexer = np.empty(nright, dtype=np.int64) - indexer.fill(-1) + indexer[:] = -1 if limit is None: lim = nright diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 2894e014b84b8..63c7d26e48870 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -370,7 +370,7 @@ def group_any_all(ndarray[uint8_t] out, else: raise ValueError("'bool_func' must be either 'any' or 'all'!") - out.fill(1 - flag_val) + out[:] = 1 - flag_val with nogil: for i in range(N): diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 315cfea56896e..87935525d5842 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -246,7 +246,7 @@ def group_ohlc_{{name}}(ndarray[{{c_type}}, ndim=2] out, if K > 1: raise NotImplementedError("Argument 'values' must have only " "one dimension") - out.fill(np.nan) + out[:] = np.nan with nogil: for i in range(N): @@ -629,10 +629,10 @@ def group_max(ndarray[groupby_t, ndim=2] out, maxx = np.empty_like(out) if groupby_t is int64_t: # Note: evaluated at compile-time - maxx.fill(-_int64_max) + maxx[:] = -_int64_max nan_val = iNaT else: - maxx.fill(-np.inf) + maxx[:] = -np.inf nan_val = NAN N, K = ( values).shape @@ -691,10 +691,10 @@ def group_min(ndarray[groupby_t, ndim=2] out, minx = np.empty_like(out) if groupby_t is int64_t: - minx.fill(_int64_max) + minx[:] = _int64_max nan_val = iNaT else: - minx.fill(np.inf) + minx[:] = np.inf nan_val = NAN N, K = ( values).shape @@ -747,9 +747,9 @@ def group_cummin(ndarray[groupby_t, ndim=2] out, N, K = ( values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(_int64_max) + accum[:] = _int64_max else: - accum.fill(np.inf) + accum[:] = np.inf with nogil: for i in range(N): @@ -795,9 +795,9 @@ def group_cummax(ndarray[groupby_t, ndim=2] out, N, K = ( values).shape accum = np.empty_like(values) if groupby_t is int64_t: - accum.fill(-_int64_max) + accum[:] = -_int64_max else: - accum.fill(-np.inf) + accum[:] = -np.inf with nogil: for i in range(N): diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index c92e0a4a7aa23..97501ca786550 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -212,7 +212,7 @@ def _get_result_indexer(sorter, indexer): else: # length-0 case res = np.empty(len(indexer), dtype=np.int64) - res.fill(-1) + res[:] = -1 return res diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c57dd66a33fe0..42158f55cb5d3 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -347,7 +347,7 @@ def get_reverse_indexer(ndarray[int64_t] indexer, Py_ssize_t length): int64_t idx rev_indexer = np.empty(length, dtype=np.int64) - rev_indexer.fill(-1) + rev_indexer[:] = -1 for i in range(n): idx = indexer[i] if idx != -1: diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 705d93da10ba8..540cb3204abcb 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -221,7 +221,7 @@ cdef class IntIndex(SparseIndex): n = len(indexer) results = np.empty(n, dtype=np.int32) - results.fill(-1) + results[:] = -1 if self.npoints == 0: return results @@ -250,7 +250,7 @@ cdef class IntIndex(SparseIndex): sinds = self.indices result = np.empty(other.npoints, dtype=np.float64) - result.fill(fill_value) + result[:] = fill_value for 0 <= i < other.npoints: while oinds[i] > sinds[j] and j < self.npoints: @@ -582,7 +582,7 @@ cdef class BlockIndex(SparseIndex): n = len(indexer) results = np.empty(n, dtype=np.int32) - results.fill(-1) + results[:] = -1 if self.npoints == 0: return results diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index cf00319705ef1..c543562fc0847 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -914,8 +914,8 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, # result_a) or right of the DST transition (store in result_b) result_a = np.empty(n, dtype=np.int64) result_b = np.empty(n, dtype=np.int64) - result_a.fill(NPY_NAT) - result_b.fill(NPY_NAT) + result_a[:] = NPY_NAT + result_b[:] = NPY_NAT idx_shifted_left = (np.maximum(0, trans.searchsorted( vals - DAY_NS, side='right') - 1)).astype(np.int64) From 111683626efbea121d77adf5d6ae572489123a14 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 14:19:46 -0700 Subject: [PATCH 3/8] one more fill --- pandas/_libs/tslibs/conversion.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index c543562fc0847..878234ed13d57 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -939,7 +939,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None, if infer_dst: dst_hours = np.empty(n, dtype=np.int64) - dst_hours.fill(NPY_NAT) + dst_hours[:] = NPY_NAT # Get the ambiguous hours (given the above, these are the hours # where result_a != result_b and neither of them are NAT) From 2dc92642ad033ea1f8e6ce216c2876ea975a2b3f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 14:59:05 -0700 Subject: [PATCH 4/8] small cleanups --- pandas/_libs/algos_rank_helper.pxi.in | 4 ++-- pandas/_libs/groupby_helper.pxi.in | 4 ++-- pandas/_libs/tslibs/frequencies.pyx | 12 ++++++++---- pandas/_libs/tslibs/timestamps.pyx | 2 +- pandas/_libs/writers.pyx | 16 ++++++++-------- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index bb4aec75ed567..b8fdcd38c12cd 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -4,9 +4,9 @@ Template for each `dtype` helper function for rank WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in """ -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # rank_1d, rank_2d -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/groupby_helper.pxi.in b/pandas/_libs/groupby_helper.pxi.in index 87935525d5842..e2a113da8925b 100644 --- a/pandas/_libs/groupby_helper.pxi.in +++ b/pandas/_libs/groupby_helper.pxi.in @@ -8,9 +8,9 @@ cdef extern from "numpy/npy_math.h": double NAN "NPY_NAN" _int64_max = np.iinfo(np.int64).max -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- # group_add, group_prod, group_var, group_mean, group_ohlc -#---------------------------------------------------------------------- +# ---------------------------------------------------------------------- {{py: diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index c555fce9dd007..fff4d04399481 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -154,8 +154,7 @@ cpdef get_freq_code(freqstr): freqstr = (freqstr.rule_code, freqstr.n) if isinstance(freqstr, tuple): - if (is_integer_object(freqstr[0]) and - is_integer_object(freqstr[1])): + if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]): # e.g., freqstr = (2000, 1) return freqstr else: @@ -171,7 +170,7 @@ cpdef get_freq_code(freqstr): return code, stride if is_integer_object(freqstr): - return (freqstr, 1) + return freqstr, 1 base, stride = _base_and_stride(freqstr) code = _period_str_to_code(base) @@ -183,6 +182,11 @@ cpdef _base_and_stride(freqstr): """ Return base freq and stride info from string representation + Returns + ------- + base : str + stride : int + Examples -------- _freq_and_stride('5Min') -> 'Min', 5 @@ -201,7 +205,7 @@ cpdef _base_and_stride(freqstr): base = groups.group(2) - return (base, stride) + return base, stride cpdef _period_str_to_code(freqstr): diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 65da765bae739..8b88c3eee27b0 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -177,7 +177,7 @@ def round_nsint64(values, mode, freq): # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug - assert False, "round_nsint64 called with an unrecognized rounding mode" + raise ValueError("round_nsint64 called with an unrecognized rounding mode") # This is PITA. Because we inherit from datetime, which has very specific diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx index 9af12cbec1e9c..4a0d1a7620fc5 100644 --- a/pandas/_libs/writers.pyx +++ b/pandas/_libs/writers.pyx @@ -128,16 +128,16 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t: """ return the maximum size of elements in a 1-dim string array """ cdef: Py_ssize_t i, m = 0, l = 0, length = arr.shape[0] - pandas_string v + pandas_string val for i in range(length): - v = arr[i] - if isinstance(v, str): - l = PyString_GET_SIZE(v) - elif isinstance(v, bytes): - l = PyBytes_GET_SIZE(v) - elif isinstance(v, unicode): - l = PyUnicode_GET_SIZE(v) + val = arr[i] + if isinstance(val, str): + l = PyString_GET_SIZE(val) + elif isinstance(val, bytes): + l = PyBytes_GET_SIZE(val) + elif isinstance(val, unicode): + l = PyUnicode_GET_SIZE(val) if l > m: m = l From 7f1126f01a2c27c71568a5dd2b8bfd47ac901ec8 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 15:20:22 -0700 Subject: [PATCH 5/8] small cleanup --- pandas/_libs/tslibs/offsets.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index 78e1269aa5363..8f5887754e40d 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -532,7 +532,8 @@ cdef inline int month_add_months(npy_datetimestruct dts, int months) nogil: New month number after shifting npy_datetimestruct number of months. """ - cdef int new_month = (dts.month + months) % 12 + cdef: + int new_month = (dts.month + months) % 12 return 12 if new_month == 0 else new_month From c3bd47a8b513423e18ba937903656e6d2f2ebf77 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 15:36:10 -0700 Subject: [PATCH 6/8] remove unnecessary cpdef --- pandas/_libs/lib.pyx | 4 ++-- pandas/_libs/missing.pyx | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 42158f55cb5d3..a0675d0770903 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1667,7 +1667,7 @@ cdef class TimedeltaValidator(TemporalValidator): # TODO: Not used outside of tests; remove? -def is_timedelta_array(values: ndarray) -> bint: +def is_timedelta_array(values: ndarray) -> bool: cdef: TimedeltaValidator validator = TimedeltaValidator(len(values), skipna=True) @@ -1680,7 +1680,7 @@ cdef class Timedelta64Validator(TimedeltaValidator): # TODO: Not used outside of tests; remove? -def is_timedelta64_array(values: ndarray) -> bint: +def is_timedelta64_array(values: ndarray) -> bool: cdef: Timedelta64Validator validator = Timedelta64Validator(len(values), skipna=True) diff --git a/pandas/_libs/missing.pyx b/pandas/_libs/missing.pyx index 2590a30c57f33..a7390bada17c0 100644 --- a/pandas/_libs/missing.pyx +++ b/pandas/_libs/missing.pyx @@ -278,14 +278,14 @@ def isnaobj2d_old(ndarray arr): return result.view(np.bool_) -cpdef bint isposinf_scalar(object val): +def isposinf_scalar(val: object) -> bool: if util.is_float_object(val) and val == INF: return True else: return False -cpdef bint isneginf_scalar(object val): +def isneginf_scalar(val: object) -> bool: if util.is_float_object(val) and val == NEGINF: return True else: From 6cbf556a3da9e12c5d9b7ec7d626a0215d01f094 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sat, 3 Nov 2018 16:12:08 -0700 Subject: [PATCH 7/8] modernize for loop --- pandas/_libs/sparse.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 540cb3204abcb..b1138b6a17b79 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -252,7 +252,7 @@ cdef class IntIndex(SparseIndex): result = np.empty(other.npoints, dtype=np.float64) result[:] = fill_value - for 0 <= i < other.npoints: + for i in range(other.npoints): while oinds[i] > sinds[j] and j < self.npoints: j += 1 From 86abb7debb6806c269bb87c2c79f3fffc06e68e4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 4 Nov 2018 08:25:20 -0800 Subject: [PATCH 8/8] change valueError to AssertionError --- pandas/_libs/tslibs/timestamps.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8b88c3eee27b0..4cc7e06da64fe 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -177,7 +177,8 @@ def round_nsint64(values, mode, freq): # if/elif above should catch all rounding modes defined in enum 'RoundTo': # if flow of control arrives here, it is a bug - raise ValueError("round_nsint64 called with an unrecognized rounding mode") + raise AssertionError("round_nsint64 called with an unrecognized " + "rounding mode") # This is PITA. Because we inherit from datetime, which has very specific