From af2ab1cd354b5a0982715f176dc488f5027fb407 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 18 Sep 2017 09:43:52 -0700 Subject: [PATCH 1/6] Make properties.pyx its own module, remove offsets.pyx --- pandas/_libs/lib.pyx | 2 +- pandas/_libs/properties.pxd | 18 ++ pandas/_libs/properties.pyx | 76 ++++++++ pandas/_libs/src/offsets.pyx | 367 ----------------------------------- setup.py | 3 +- 5 files changed, 97 insertions(+), 369 deletions(-) create mode 100644 pandas/_libs/properties.pxd create mode 100644 pandas/_libs/properties.pyx delete mode 100644 pandas/_libs/src/offsets.pyx diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 53ca41e4b2489..88ec43bb867f7 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -67,6 +67,7 @@ import tslib from tslib import NaT, Timestamp, Timedelta import interval from interval import Interval +from properties import AxisProperty, cache_readonly cdef int64_t NPY_NAT = util.get_nat() @@ -1907,5 +1908,4 @@ cdef class BlockPlacement: include "reduce.pyx" -include "properties.pyx" include "inference.pyx" diff --git a/pandas/_libs/properties.pxd b/pandas/_libs/properties.pxd new file mode 100644 index 0000000000000..1a25b9dcbded6 --- /dev/null +++ b/pandas/_libs/properties.pxd @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 + +from cython cimport Py_ssize_t + + +cdef class cache_readonly(object): + cdef readonly: + object func, name, allow_setting + + +cdef class AxisProperty(object): + cdef: + Py_ssize_t axis diff --git a/pandas/_libs/properties.pyx b/pandas/_libs/properties.pyx new file mode 100644 index 0000000000000..a2d64082cb2e5 --- /dev/null +++ b/pandas/_libs/properties.pyx @@ -0,0 +1,76 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# cython: profile=False +# cython: linetrace=False +# distutils: define_macros=CYTHON_TRACE=0 +# distutils: define_macros=CYTHON_TRACE_NOGIL=0 + +from cython cimport Py_ssize_t + +from cpython cimport ( + PyDict_Contains, PyDict_GetItem, PyDict_SetItem) + + +cdef class cache_readonly(object): + + #cdef readonly: + # object func, name, allow_setting + + def __init__(self, func=None, allow_setting=False): + if func is not None: + self.func = func + self.name = func.__name__ + self.allow_setting = allow_setting + + def __call__(self, func, doc=None): + self.func = func + self.name = func.__name__ + return self + + def __get__(self, obj, typ): + # Get the cache or set a default one if needed + + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except (AttributeError): + return + + if PyDict_Contains(cache, self.name): + # not necessary to Py_INCREF + val = PyDict_GetItem(cache, self.name) + else: + val = self.func(obj) + PyDict_SetItem(cache, self.name, val) + return val + + def __set__(self, obj, value): + + if not self.allow_setting: + raise Exception("cannot set values for [%s]" % self.name) + + # Get the cache or set a default one if needed + cache = getattr(obj, '_cache', None) + if cache is None: + try: + cache = obj._cache = {} + except (AttributeError): + return + + PyDict_SetItem(cache, self.name, value) + + +cdef class AxisProperty(object): + #cdef: + # Py_ssize_t axis + + def __init__(self, axis=0): + self.axis = axis + + def __get__(self, obj, type): + cdef list axes = obj._data.axes + return axes[self.axis] + + def __set__(self, obj, value): + obj._set_axis(self.axis, value) diff --git a/pandas/_libs/src/offsets.pyx b/pandas/_libs/src/offsets.pyx deleted file mode 100644 index c963e256d0aa5..0000000000000 --- a/pandas/_libs/src/offsets.pyx +++ /dev/null @@ -1,367 +0,0 @@ - -ctypedef enum time_res: - r_min = 0 - r_microsecond - r_second - r_minute - r_hour - r_day - r_month - r_year - r_max = 98 - r_invalid = 99 - - -cdef conversion_factor(time_res res1, time_res res2): - cdef: - time_res min_res, max_res - int64_t factor - - min_res = min(res1, res2) - max_res = max(res1, res2) - factor = 1 - - if min_res == max_res: - return factor - - while min_res < max_res: - if min_res < r_microsecond: - raise "Cannot convert from less than us" - elif min_res == r_microsecond: - factor *= 1000000 - min_res = r_second - elif min_res == r_second: - factor *= 60 - min_res = r_minute - elif min_res == r_minute: - factor *= 60 - min_res = r_hour - elif min_res == r_hour: - factor *= 24 - min_res = r_day - else: - raise "Cannot convert to month or year" - - return factor - -# Logic to generate ranges -# ----------------------------------------------------------------------------- - -cdef inline int64_t weekend_adjustment(int64_t dow, int bkwd): - if dow > 4: # sat or sun? - if bkwd: # roll back 1 or 2 days - return (4 - dow) - else: # roll forward 2 or 1 days - return (7 - dow) - return 0 - -cdef int64_t us_in_day = conversion_factor(r_microsecond, r_day) - -cdef class _Offset: - """ - Base class to generate timestamps. Set the anchor, and then move offsets - with next & prev. Retrieve timestamp with ts attribute. - """ - cdef: - int64_t t, dow, biz, dayoffset - object start - _TSObject ts - - def __cinit__(self): - self.t=0 - self.dow=0 - self.biz=0 - self.dayoffset=0 - - cpdef anchor(self, object start=None): - if start is not None: - self.start = start - self.ts = convert_to_tsobject(self.start, None, None) - self._setup() - - cdef _setup(self): - pass - - cpdef next(self): - pass - - cpdef __next__(self): - """wrapper around next""" - return self.next() - - cpdef prev(self): - pass - - cdef int64_t _ts(self): - """ - Access the current timestamp value, with a possible weekday - adjustment. - """ - cdef int64_t adj - - if self.biz != 0: - adj = weekend_adjustment(self.dow, self.biz < 0) - return self.t + us_in_day * adj - else: - return self.t - - cdef int64_t _get_anchor(self): - """ - Retrieve an anchor relating to current offset we're on. - """ - return self.t - self.dayoffset * us_in_day - - property ts: - def __get__(self): - return self._ts() - -cdef class YearOffset(_Offset): - """ - Generate annual timestamps from provided start time; apply dayoffset to - each timestamp. If biz > 0, we choose the next business day at each time; - previous if < 0. - - Parameters - ---------- - dayoffset : int - biz : int - """ - cdef: - int64_t y, ly - - def __init__(self, int64_t dayoffset=0, int64_t biz=0, object anchor=None): - self.dayoffset = dayoffset - self.biz = biz - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - self.t = ts.value + self.dayoffset * us_in_day - self.y = ts.dts.year - - self.ly = (ts.dts.month > 2 or - ts.dts.month == 2 and ts.dts.day == 29) - - if self.biz != 0: - self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7 - - cpdef next(self): - cdef int64_t days - - days = 365 + is_leapyear(self.y + self.ly) - - self.t += days * us_in_day - self.y += 1 - - if self.biz != 0: - self.dow = (self.dow + days) % 7 - - cpdef prev(self): - cdef int64_t days - - days = 365 + is_leapyear(self.y - (1 - self.ly)) - - self.t -= days * us_in_day - self.y -= 1 - - if self.biz != 0: - self.dow = (self.dow - days) % 7 - -cdef class MonthOffset(_Offset): - """ - Generate monthly timestamps from provided start time, and apply dayoffset - to each timestamp. Stride to construct strided timestamps (eg quarterly). - If biz > 0, we choose the next business day at each time; previous if < 0. - - Parameters - ---------- - dayoffset : int - stride : int, > 0 - biz : int - """ - cdef: - Py_ssize_t stride, ly, m - int64_t y - - def __init__(self, int64_t dayoffset=0, Py_ssize_t stride=1, - int64_t biz=0, object anchor=None): - self.dayoffset = dayoffset - self.stride = stride - self.biz = biz - - if stride <= 0: - raise ValueError("Stride must be positive") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - self.t = ts.value + (self.dayoffset * us_in_day) - - # for day counting - self.m = ts.dts.month - 1 - self.y = ts.dts.year - self.ly = is_leapyear(self.y) - - if self.biz != 0: - self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7 - - cpdef next(self): - cdef: - int64_t tmp, days - Py_ssize_t j - - days = 0 - for j in range(0, self.stride): - if self.m >= 12: - self.m -= 12 - self.y += 1 - self.ly = is_leapyear(self.y) - days += days_per_month_table[self.ly][self.m] - self.m += 1 - - self.t += days * us_in_day - - if self.biz != 0: - self.dow = (self.dow + days) % 7 - - cpdef prev(self): - cdef: - int64_t tmp, days - Py_ssize_t j - - days = 0 - for j in range(0, self.stride): - self.m -= 1 - if self.m < 0: - self.m += 12 - self.y -= 1 - self.ly = is_leapyear(self.y) - days += days_per_month_table[self.ly][self.m] - - self.t -= days * us_in_day - - if self.biz != 0: - self.dow = (self.dow - days) % 7 - -cdef class DayOfMonthOffset(_Offset): - """ - Generate relative monthly timestamps from month & year of provided start - time. For example, fridays of the third week of each month (week=3, day=4); - or, thursdays of the last week of each month (week=-1, day=3). - - Parameters - ---------- - week : int - day : int, 0 to 6 - """ - cdef: - Py_ssize_t ly, m - int64_t y, day, week - - def __init__(self, int64_t week=0, int64_t day=0, object anchor=None): - self.week = week - self.day = day - - if self.day < 0 or self.day > 6: - raise ValueError("Day offset must be 0 to 6") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - - # rewind to beginning of month - self.t = ts.value - (ts.dts.day - 1) * us_in_day - self.dow = dayofweek(ts.dts.year, ts.dts.month, 1) - - # for day counting - self.m = ts.dts.month - 1 - self.y = ts.dts.year - self.ly = is_leapyear(self.y) - - cpdef next(self): - cdef: - int64_t tmp, days - - days = days_per_month_table[self.ly][self.m] - self.t += days * us_in_day - self.dow = (self.dow + days) % 7 - - self.m += 1 - if self.m >= 12: - self.m -= 12 - self.y += 1 - self.ly = is_leapyear(self.y) - - cpdef prev(self): - cdef: - int64_t tmp, days - - days = days_per_month_table[self.ly][(self.m - 1) % 12] - self.t -= days * us_in_day - self.dow = (self.dow - days) % 7 - - self.m -= 1 - if self.m < 0: - self.m += 12 - self.y -= 1 - self.ly = is_leapyear(self.y) - - cdef int64_t _ts(self): - """ - Overwrite default adjustment - """ - cdef int64_t adj = (self.week * 7) + (self.day - self.dow) % 7 - return self.t + us_in_day * adj - -cdef class DayOffset(_Offset): - """ - Generate daily timestamps beginning with first valid time >= start time. If - biz != 0, we skip weekends. Stride, to construct weekly timestamps. - - Parameters - ---------- - stride : int, > 0 - biz : boolean - """ - cdef: - Py_ssize_t stride - - def __init__(self, int64_t stride=1, int64_t biz=0, object anchor=None): - self.stride = stride - self.biz = biz - - if self.stride <= 0: - raise ValueError("Stride must be positive") - - if anchor is not None: - self.anchor(anchor) - - cdef _setup(self): - cdef _TSObject ts = self.ts - self.t = ts.value - if self.biz != 0: - self.dow = ts_dayofweek(ts) - - cpdef next(self): - self.t += (self.stride * us_in_day) - if self.biz != 0: - self.dow = (self.dow + self.stride) % 7 - if self.dow >= 5: - self.t += (7 - self.dow) * us_in_day - self.dow = 0 - - cpdef prev(self): - self.t -= (self.stride * us_in_day) - if self.biz != 0: - self.dow = (self.dow - self.stride) % 7 - if self.dow >= 5: - self.t += (4 - self.dow) * us_in_day - self.dow = 4 diff --git a/setup.py b/setup.py index 0e4e22b875e1d..b8f23870027ae 100755 --- a/setup.py +++ b/setup.py @@ -437,7 +437,7 @@ def get_tag(self): cmdclass['build_src'] = DummyBuildSrc cmdclass['build_ext'] = CheckingBuildExt -lib_depends = ['reduce', 'inference', 'properties'] +lib_depends = ['reduce', 'inference'] def srcpath(name=None, suffix='.pyx', subdir='src'): @@ -476,6 +476,7 @@ def pxd(name): libraries = ['m'] if not is_platform_windows() else [] ext_data = { + '_libs.properties': {'pyxfile': '_libs/properties', 'include': []}, '_libs.lib': {'pyxfile': '_libs/lib', 'depends': lib_depends + tseries_depends}, '_libs.hashtable': {'pyxfile': '_libs/hashtable', From 189b655c6011ef6c97b5c4cdce939d7216f98494 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 18 Sep 2017 13:33:14 -0700 Subject: [PATCH 2/6] Use is_period_object instead of is_period --- pandas/_libs/period.pyx | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 49353f7b0491c..75164748128e2 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -27,13 +27,12 @@ from datetime cimport ( INT32_MIN) -cimport util, lib +cimport util from util cimport is_period_object, is_string_object -from lib cimport is_null_datetimelike, is_period -from pandas._libs import tslib, lib -from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, - NaT) +from lib cimport is_null_datetimelike +from pandas._libs import tslib +from pandas._libs.tslib import Timestamp, iNaT, NaT from tslibs.timezones cimport ( is_utc, is_tzlocal, get_utcoffset, _get_dst_info, maybe_get_tz) from tslib cimport _nat_scalar_rules @@ -485,7 +484,7 @@ def extract_freq(ndarray[object] values): try: # now Timestamp / NaT has freq attr - if is_period(p): + if is_period_object(p): return p.freq except AttributeError: pass @@ -728,8 +727,7 @@ cdef class _Period(object): return hash((self.ordinal, self.freqstr)) def _add_delta(self, other): - if isinstance(other, (timedelta, np.timedelta64, - offsets.Tick, Timedelta)): + if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)): offset = frequencies.to_offset(self.freq.rule_code) if isinstance(offset, offsets.Tick): nanos = tslib._delta_to_nanoseconds(other) @@ -754,12 +752,11 @@ cdef class _Period(object): def __add__(self, other): if is_period_object(self): if isinstance(other, (timedelta, np.timedelta64, - offsets.DateOffset, - Timedelta)): + offsets.DateOffset)): return self._add_delta(other) elif other is NaT: return NaT - elif lib.is_integer(other): + elif util.is_integer_object(other): ordinal = self.ordinal + other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) else: # pragma: no cover @@ -772,11 +769,10 @@ cdef class _Period(object): def __sub__(self, other): if is_period_object(self): if isinstance(other, (timedelta, np.timedelta64, - offsets.DateOffset, - Timedelta)): + offsets.DateOffset)): neg_other = -other return self + neg_other - elif lib.is_integer(other): + elif util.is_integer_object(other): ordinal = self.ordinal - other * self.freq.n return Period(ordinal=ordinal, freq=self.freq) elif is_period_object(other): @@ -1159,7 +1155,7 @@ class Period(_Period): raise ValueError(("Only value or ordinal but not both should be " "given but not both")) elif ordinal is not None: - if not lib.is_integer(ordinal): + if not util.is_integer_object(ordinal): raise ValueError("Ordinal must be an integer") if freq is None: raise ValueError('Must supply freq for ordinal value') @@ -1196,8 +1192,8 @@ class Period(_Period): elif is_null_datetimelike(value) or value in tslib._nat_strings: ordinal = iNaT - elif is_string_object(value) or lib.is_integer(value): - if lib.is_integer(value): + elif is_string_object(value) or util.is_integer_object(value): + if util.is_integer_object(value): value = str(value) value = value.upper() dt, _, reso = parse_time_string(value, freq) From 69a0ac30e866cdfc9dcfdb96d1fe944a3640e56d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 18 Sep 2017 15:45:35 -0700 Subject: [PATCH 3/6] Remove util dep from reshape --- pandas/_libs/reshape.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index d6996add374a9..db2e8b43d1ead 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -8,8 +8,6 @@ from cython cimport Py_ssize_t np.import_array() -cimport util - from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float16_t, float32_t, float64_t) From 4bd580c3849797000867fc8075c449bb92b7b75e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 18 Sep 2017 17:49:09 -0700 Subject: [PATCH 4/6] Remove unused cimports --- pandas/_libs/algos.pyx | 1 - pandas/_libs/groupby.pyx | 2 -- pandas/_libs/join.pyx | 2 -- setup.py | 4 ++-- 4 files changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index 8cbc65633c786..d159761c3f5e6 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -33,7 +33,6 @@ from libc.math cimport sqrt, fabs from util cimport numeric, get_nat cimport lib -from lib cimport is_null_datetimelike from pandas._libs import lib cdef int64_t iNaT = get_nat() diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 9500e685367c8..1cb7b18fa4f61 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -7,8 +7,6 @@ cimport cython cnp.import_array() -cimport util - from numpy cimport (ndarray, double_t, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx index 503bdda75875f..33c3650fa0425 100644 --- a/pandas/_libs/join.pyx +++ b/pandas/_libs/join.pyx @@ -8,8 +8,6 @@ from cython cimport Py_ssize_t np.import_array() -cimport util - from numpy cimport (ndarray, int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, uint32_t, uint64_t, float16_t, float32_t, float64_t) diff --git a/setup.py b/setup.py index b8f23870027ae..198c7033ad4c9 100755 --- a/setup.py +++ b/setup.py @@ -512,7 +512,7 @@ def pxd(name): 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', - 'depends': _pxi_dep['reshape']}, + 'depends': _pxi_dep['reshape'], 'include': []}, '_libs.interval': {'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, @@ -528,7 +528,7 @@ def pxd(name): 'pandas/_libs/src/parser/io.c']}, '_libs.sparse': {'pyxfile': '_libs/sparse', 'depends': (['pandas/_libs/sparse.pyx'] + - _pxi_dep['sparse'])}, + _pxi_dep['sparse']), 'include': []}, '_libs.testing': {'pyxfile': '_libs/testing', 'depends': ['pandas/_libs/testing.pyx']}, '_libs.hashing': {'pyxfile': '_libs/hashing', From 1a1dd1da7468067bc4722fc926182f9188b462fb Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 22 Sep 2017 11:20:52 -0700 Subject: [PATCH 5/6] fixup remove accidentally re-added imports --- pandas/_libs/lib.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 88ec43bb867f7..01548e17d39ab 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -67,7 +67,6 @@ import tslib from tslib import NaT, Timestamp, Timedelta import interval from interval import Interval -from properties import AxisProperty, cache_readonly cdef int64_t NPY_NAT = util.get_nat() From eaa48da9674ff2f981c00a0c80c716bf8bec9e3d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 22 Sep 2017 11:22:55 -0700 Subject: [PATCH 6/6] fixup remove dup --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 3c137294da90b..555cf9dc4a9b3 100755 --- a/setup.py +++ b/setup.py @@ -476,7 +476,6 @@ def pxd(name): libraries = ['m'] if not is_platform_windows() else [] ext_data = { - '_libs.properties': {'pyxfile': '_libs/properties', 'include': []}, '_libs.lib': {'pyxfile': '_libs/lib', 'depends': lib_depends + tseries_depends}, '_libs.properties': {'pyxfile': '_libs/properties', 'include': []},