From 0501340374a7ea8168a46374a0e543761d785ada Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Sep 2017 14:37:30 -0700 Subject: [PATCH 1/4] remove unused time conversion funcs --- pandas/_libs/index.pyx | 38 +++++----------------------- pandas/_libs/lib.pyx | 57 ------------------------------------------ pandas/_libs/tslib.pyx | 2 +- pandas/io/pytables.py | 4 +-- 4 files changed, 9 insertions(+), 92 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 884117799ec5b..c19c14f541de6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -22,20 +22,11 @@ from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta -from datetime cimport (get_datetime64_value, _pydatetime_to_dts, - pandas_datetimestruct) - from cpython cimport PyTuple_Check, PyList_Check -cdef extern from "datetime.h": - bint PyDateTime_Check(object o) - void PyDateTime_IMPORT() - cdef int64_t iNaT = util.get_nat() -PyDateTime_IMPORT - cdef extern from "Python.h": int PySlice_Check(object) @@ -415,12 +406,12 @@ cdef class DatetimeEngine(Int64Engine): if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() - conv = _to_i8(val) + conv = tslib.pydt_to_i8(val) loc = values.searchsorted(conv, side='left') return util.get_value_at(values, loc) == conv self._ensure_mapping_populated() - return _to_i8(val) in self.mapping + return tslib.pydt_to_i8(val) in self.mapping cdef _get_index_values(self): return self.vgetter().view('i8') @@ -435,12 +426,12 @@ cdef class DatetimeEngine(Int64Engine): # Welcome to the spaghetti factory if self.over_size_threshold and self.is_monotonic_increasing: if not self.is_unique: - val = _to_i8(val) + val = tslib.pydt_to_i8(val) return self._get_loc_duplicates(val) values = self._get_index_values() try: - conv = _to_i8(val) + conv = tslib.pydt_to_i8(val) loc = values.searchsorted(conv, side='left') except TypeError: self._date_check_type(val) @@ -452,7 +443,7 @@ cdef class DatetimeEngine(Int64Engine): self._ensure_mapping_populated() if not self.unique: - val = _to_i8(val) + val = tslib.pydt_to_i8(val) return self._get_loc_duplicates(val) try: @@ -463,7 +454,7 @@ cdef class DatetimeEngine(Int64Engine): pass try: - val = _to_i8(val) + val = tslib.pydt_to_i8(val) return self.mapping.get_item(val) except (TypeError, ValueError): self._date_check_type(val) @@ -540,23 +531,6 @@ cpdef convert_scalar(ndarray arr, object value): return value -cdef inline _to_i8(object val): - cdef pandas_datetimestruct dts - try: - return val.value - except AttributeError: - if util.is_datetime64_object(val): - return get_datetime64_value(val) - elif PyDateTime_Check(val): - tzinfo = getattr(val, 'tzinfo', None) - # Save the original date value so we can get the utcoffset from it. - ival = _pydatetime_to_dts(val, &dts) - if tzinfo is not None and not is_utc(tzinfo): - offset = get_utcoffset(tzinfo, val) - ival -= tslib._delta_to_nanoseconds(offset) - return ival - return val - cdef class MultiIndexObjectEngine(ObjectEngine): """ diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 01548e17d39ab..e7a5c07307440 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -45,8 +45,6 @@ cdef double NaN = np.NaN cdef double nan = NaN cdef double NAN = nan -from datetime import datetime as pydatetime - # this is our tseries.pxd from datetime cimport ( get_timedelta64_value, get_datetime64_value, @@ -132,61 +130,6 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr): s += arr[i].__sizeof__() return s -#---------------------------------------------------------------------- -# datetime / io related - -cdef int _EPOCH_ORD = 719163 - -from datetime import date as pydate - -cdef inline int64_t gmtime(object date): - cdef int y, m, d, h, mn, s, days - - y = PyDateTime_GET_YEAR(date) - m = PyDateTime_GET_MONTH(date) - d = PyDateTime_GET_DAY(date) - h = PyDateTime_DATE_GET_HOUR(date) - mn = PyDateTime_DATE_GET_MINUTE(date) - s = PyDateTime_DATE_GET_SECOND(date) - - days = pydate(y, m, 1).toordinal() - _EPOCH_ORD + d - 1 - return (( (((days * 24 + h) * 60 + mn))) * 60 + s) * 1000 - - -cpdef object to_datetime(int64_t timestamp): - return pydatetime.utcfromtimestamp(timestamp / 1000.0) - - -cpdef object to_timestamp(object dt): - return gmtime(dt) - - -def array_to_timestamp(ndarray[object, ndim=1] arr): - cdef int i, n - cdef ndarray[int64_t, ndim=1] result - - n = len(arr) - result = np.empty(n, dtype=np.int64) - - for i from 0 <= i < n: - result[i] = gmtime(arr[i]) - - return result - - -def time64_to_datetime(ndarray[int64_t, ndim=1] arr): - cdef int i, n - cdef ndarray[object, ndim=1] result - - n = len(arr) - result = np.empty(n, dtype=object) - - for i from 0 <= i < n: - result[i] = to_datetime(arr[i]) - - return result - - #---------------------------------------------------------------------- # isnull / notnull related diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4c34d0fcb1e5f..463c3acaba1cf 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3436,7 +3436,7 @@ def cast_to_nanoseconds(ndarray arr): return result -def pydt_to_i8(object pydt): +cpdef pydt_to_i8(object pydt): """ Convert to int64 representation compatible with numpy datetime64; converts to UTC diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index ea69116ec363d..ca1b4d031d3ce 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -25,7 +25,7 @@ import numpy as np from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isna, concat, + MultiIndex, Int64Index, isna, concat, to_datetime, SparseSeries, SparseDataFrame, PeriodIndex, DatetimeIndex, TimedeltaIndex) from pandas.core import config @@ -4529,7 +4529,7 @@ def _unconvert_index(data, kind, encoding=None): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None): kind = _ensure_decoded(kind) if kind == u('datetime'): - index = lib.time64_to_datetime(data) + index = to_datetime(data) elif kind in (u('integer')): index = np.asarray(data, dtype=object) elif kind in (u('string')): From c78e76625fb815b9b78abad17e660a0245c5efd2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 28 Sep 2017 21:26:52 -0700 Subject: [PATCH 2/4] revert to using _to_i8 --- pandas/_libs/index.pyx | 13 +++++++------ pandas/_libs/tslib.pxd | 2 ++ pandas/_libs/tslib.pyx | 17 +++++++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index c19c14f541de6..ce49c9a880098 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -14,6 +14,7 @@ cimport util import numpy as np cimport tslib +from tslib cimport _to_i8 from hashtable cimport HashTable @@ -406,12 +407,12 @@ cdef class DatetimeEngine(Int64Engine): if not self.is_unique: return self._get_loc_duplicates(val) values = self._get_index_values() - conv = tslib.pydt_to_i8(val) + conv = _to_i8(val) loc = values.searchsorted(conv, side='left') return util.get_value_at(values, loc) == conv self._ensure_mapping_populated() - return tslib.pydt_to_i8(val) in self.mapping + return _to_i8(val) in self.mapping cdef _get_index_values(self): return self.vgetter().view('i8') @@ -426,12 +427,12 @@ cdef class DatetimeEngine(Int64Engine): # Welcome to the spaghetti factory if self.over_size_threshold and self.is_monotonic_increasing: if not self.is_unique: - val = tslib.pydt_to_i8(val) + val = _to_i8(val) return self._get_loc_duplicates(val) values = self._get_index_values() try: - conv = tslib.pydt_to_i8(val) + conv = _to_i8(val) loc = values.searchsorted(conv, side='left') except TypeError: self._date_check_type(val) @@ -443,7 +444,7 @@ cdef class DatetimeEngine(Int64Engine): self._ensure_mapping_populated() if not self.unique: - val = tslib.pydt_to_i8(val) + val = _to_i8(val) return self._get_loc_duplicates(val) try: @@ -454,7 +455,7 @@ cdef class DatetimeEngine(Int64Engine): pass try: - val = tslib.pydt_to_i8(val) + val = _to_i8(val) return self.mapping.get_item(val) except (TypeError, ValueError): self._date_check_type(val) diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index ee8adfe67bb5e..147320b108cc8 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -4,3 +4,5 @@ cdef convert_to_tsobject(object, object, object, bint, bint) cpdef convert_to_timedelta64(object, object) cdef bint _nat_scalar_rules[6] cdef bint _check_all_nulls(obj) + +cdef _to_i8(object val) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 463c3acaba1cf..b2dfc05947791 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3436,6 +3436,23 @@ def cast_to_nanoseconds(ndarray arr): return result +cdef inline _to_i8(object val): + cdef pandas_datetimestruct dts + try: + return val.value + except AttributeError: + if is_datetime64_object(val): + return get_datetime64_value(val) + elif PyDateTime_Check(val): + tzinfo = getattr(val, 'tzinfo', None) + # Save the original date value so we can get the utcoffset from it. + ival = _pydatetime_to_dts(val, &dts) + if tzinfo is not None and not is_utc(tzinfo): + offset = get_utcoffset(tzinfo, val) + ival -= _delta_to_nanoseconds(offset) + return ival + return val + cpdef pydt_to_i8(object pydt): """ Convert to int64 representation compatible with numpy datetime64; converts From 84030ed11fab5a6a917276d805d59389cdc22463 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 29 Sep 2017 09:06:48 -0700 Subject: [PATCH 3/4] remove unused imports --- pandas/_libs/index.pyx | 4 +--- pandas/_libs/lib.pyx | 6 ------ 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index ce49c9a880098..c96251a0293d6 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -13,13 +13,11 @@ cimport util import numpy as np -cimport tslib from tslib cimport _to_i8 from hashtable cimport HashTable -from tslibs.timezones cimport is_utc, get_utcoffset -from pandas._libs import tslib, algos, hashtable as _hash +from pandas._libs import algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e7a5c07307440..503badd0ca8bc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -50,12 +50,6 @@ from datetime cimport ( get_timedelta64_value, get_datetime64_value, npy_timedelta, npy_datetime, PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check, - PyDateTime_GET_YEAR, - PyDateTime_GET_MONTH, - PyDateTime_GET_DAY, - PyDateTime_DATE_GET_HOUR, - PyDateTime_DATE_GET_MINUTE, - PyDateTime_DATE_GET_SECOND, PyDateTime_IMPORT) From 95f778a6890b445fff82f2245f8fabbef79c5a5d Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 1 Oct 2017 09:46:59 -0700 Subject: [PATCH 4/4] use Timestamp per reviewier request --- pandas/_libs/tslib.pyx | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b2dfc05947791..9730456b0c4c4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -3444,13 +3444,7 @@ cdef inline _to_i8(object val): if is_datetime64_object(val): return get_datetime64_value(val) elif PyDateTime_Check(val): - tzinfo = getattr(val, 'tzinfo', None) - # Save the original date value so we can get the utcoffset from it. - ival = _pydatetime_to_dts(val, &dts) - if tzinfo is not None and not is_utc(tzinfo): - offset = get_utcoffset(tzinfo, val) - ival -= _delta_to_nanoseconds(offset) - return ival + return Timestamp(val).value return val cpdef pydt_to_i8(object pydt):