Skip to content

Commit b1aeb77

Browse files
committed
Merge pull request #9504 from blbradley/period-cython-optmize01
PERF: Cython optimizations for period module, round one
2 parents c88b0ba + 3c4ece3 commit b1aeb77

File tree

2 files changed

+76
-61
lines changed

2 files changed

+76
-61
lines changed

pandas/src/period.pyx

+75-61
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
from datetime import datetime, date, timedelta
22
import operator
33

4+
from cpython cimport (
5+
PyObject_RichCompareBool,
6+
Py_EQ, Py_NE,
7+
)
8+
49
from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
510
NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
611
import numpy as np
@@ -27,6 +32,7 @@ from tslib cimport (
2732
_is_utc,
2833
_is_tzlocal,
2934
_get_dst_info,
35+
_nat_scalar_rules,
3036
)
3137

3238
from sys import version_info
@@ -606,16 +612,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
606612
return result
607613

608614

609-
def _period_field_accessor(name, alias):
610-
def f(self):
611-
from pandas.tseries.frequencies import get_freq_code as _gfc
612-
base, mult = _gfc(self.freq)
613-
return get_period_field(alias, self.ordinal, base)
614-
f.__name__ = name
615-
return property(f)
616-
617-
618-
class Period(object):
615+
cdef class Period(object):
619616
"""
620617
Represents an period of time
621618
@@ -634,14 +631,17 @@ class Period(object):
634631
minute : int, default 0
635632
second : int, default 0
636633
"""
637-
__slots__ = ['freq', 'ordinal']
634+
cdef public:
635+
int64_t ordinal
636+
object freq
637+
638638
_comparables = ['name','freqstr']
639639
_typ = 'period'
640640

641641
@classmethod
642642
def _from_ordinal(cls, ordinal, freq):
643643
""" fast creation from an ordinal and freq that are already validated! """
644-
self = object.__new__(cls)
644+
self = Period.__new__(cls)
645645
self.ordinal = ordinal
646646
self.freq = freq
647647
return self
@@ -659,7 +659,6 @@ class Period(object):
659659
self.freq = None
660660

661661
# ordinal is the period offset from the gregorian proleptic epoch
662-
self.ordinal = None
663662

664663
if ordinal is not None and value is not None:
665664
raise ValueError(("Only value or ordinal but not both should be "
@@ -669,26 +668,25 @@ class Period(object):
669668
raise ValueError("Ordinal must be an integer")
670669
if freq is None:
671670
raise ValueError('Must supply freq for ordinal value')
672-
self.ordinal = ordinal
673671

674672
elif value is None:
675673
if freq is None:
676674
raise ValueError("If value is None, freq cannot be None")
677675

678-
self.ordinal = _ordinal_from_fields(year, month, quarter, day,
676+
ordinal = _ordinal_from_fields(year, month, quarter, day,
679677
hour, minute, second, freq)
680678

681679
elif isinstance(value, Period):
682680
other = value
683681
if freq is None or _gfc(freq) == _gfc(other.freq):
684-
self.ordinal = other.ordinal
682+
ordinal = other.ordinal
685683
freq = other.freq
686684
else:
687685
converted = other.asfreq(freq)
688-
self.ordinal = converted.ordinal
686+
ordinal = converted.ordinal
689687

690688
elif lib.is_null_datetimelike(value) or value in tslib._nat_strings:
691-
self.ordinal = tslib.iNaT
689+
ordinal = tslib.iNaT
692690
if freq is None:
693691
raise ValueError("If value is NaT, freq cannot be None "
694692
"because it cannot be inferred")
@@ -722,26 +720,30 @@ class Period(object):
722720
# TODO: Better error message - this is slightly confusing
723721
raise ValueError('Only mult == 1 supported')
724722

725-
if self.ordinal is None:
726-
self.ordinal = period_ordinal(dt.year, dt.month, dt.day,
723+
if ordinal is None:
724+
self.ordinal = get_period_ordinal(dt.year, dt.month, dt.day,
727725
dt.hour, dt.minute, dt.second, dt.microsecond, 0,
728726
base)
727+
else:
728+
self.ordinal = ordinal
729729

730730
self.freq = frequencies._get_freq_str(base)
731731

732-
def __eq__(self, other):
732+
def __richcmp__(self, other, op):
733733
if isinstance(other, Period):
734734
from pandas.tseries.frequencies import get_freq_code as _gfc
735735
if other.freq != self.freq:
736736
raise ValueError("Cannot compare non-conforming periods")
737737
if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT:
738-
return False
739-
return (self.ordinal == other.ordinal
740-
and _gfc(self.freq) == _gfc(other.freq))
741-
return NotImplemented
742-
743-
def __ne__(self, other):
744-
return not self == other
738+
return _nat_scalar_rules[op]
739+
return PyObject_RichCompareBool(self.ordinal, other.ordinal, op)
740+
else:
741+
if op == Py_EQ:
742+
return NotImplemented
743+
elif op == Py_NE:
744+
return NotImplemented
745+
raise TypeError('Cannot compare type %r with type %r' %
746+
(type(self).__name__, type(other).__name__))
745747

746748
def __hash__(self):
747749
return hash((self.ordinal, self.freq))
@@ -807,25 +809,6 @@ class Period(object):
807809
else: # pragma: no cover
808810
return NotImplemented
809811

810-
def _comp_method(func, name):
811-
def f(self, other):
812-
if isinstance(other, Period):
813-
if other.freq != self.freq:
814-
raise ValueError("Cannot compare non-conforming periods")
815-
if self.ordinal == tslib.iNaT or other.ordinal == tslib.iNaT:
816-
return False
817-
return func(self.ordinal, other.ordinal)
818-
else:
819-
raise TypeError(other)
820-
821-
f.__name__ = name
822-
return f
823-
824-
__lt__ = _comp_method(operator.lt, '__lt__')
825-
__le__ = _comp_method(operator.le, '__le__')
826-
__gt__ = _comp_method(operator.gt, '__gt__')
827-
__ge__ = _comp_method(operator.ge, '__ge__')
828-
829812
def asfreq(self, freq, how='E'):
830813
"""
831814
Convert Period to desired frequency, either at the start or end of the
@@ -898,19 +881,50 @@ class Period(object):
898881
dt64 = period_ordinal_to_dt64(val.ordinal, base)
899882
return Timestamp(dt64, tz=tz)
900883

901-
year = _period_field_accessor('year', 0)
902-
month = _period_field_accessor('month', 3)
903-
day = _period_field_accessor('day', 4)
904-
hour = _period_field_accessor('hour', 5)
905-
minute = _period_field_accessor('minute', 6)
906-
second = _period_field_accessor('second', 7)
907-
weekofyear = _period_field_accessor('week', 8)
908-
week = weekofyear
909-
dayofweek = _period_field_accessor('dayofweek', 10)
910-
weekday = dayofweek
911-
dayofyear = _period_field_accessor('dayofyear', 9)
912-
quarter = _period_field_accessor('quarter', 2)
913-
qyear = _period_field_accessor('qyear', 1)
884+
cdef _field(self, alias):
885+
from pandas.tseries.frequencies import get_freq_code as _gfc
886+
base, mult = _gfc(self.freq)
887+
return get_period_field(alias, self.ordinal, base)
888+
889+
property year:
890+
def __get__(self):
891+
return self._field(0)
892+
property month:
893+
def __get__(self):
894+
return self._field(3)
895+
property day:
896+
def __get__(self):
897+
return self._field(4)
898+
property hour:
899+
def __get__(self):
900+
return self._field(5)
901+
property minute:
902+
def __get__(self):
903+
return self._field(6)
904+
property second:
905+
def __get__(self):
906+
return self._field(7)
907+
property weekofyear:
908+
def __get__(self):
909+
return self._field(8)
910+
property week:
911+
def __get__(self):
912+
return self.weekofyear
913+
property dayofweek:
914+
def __get__(self):
915+
return self._field(10)
916+
property weekday:
917+
def __get__(self):
918+
return self.dayofweek
919+
property dayofyear:
920+
def __get__(self):
921+
return self._field(9)
922+
property quarter:
923+
def __get__(self):
924+
return self._field(2)
925+
property qyear:
926+
def __get__(self):
927+
return self._field(1)
914928

915929
@classmethod
916930
def now(cls, freq=None):
@@ -1094,7 +1108,7 @@ def _ordinal_from_fields(year, month, quarter, day, hour, minute,
10941108
if quarter is not None:
10951109
year, month = _quarter_to_myear(year, quarter, freq)
10961110

1097-
return period_ordinal(year, month, day, hour, minute, second, 0, 0, base)
1111+
return get_period_ordinal(year, month, day, hour, minute, second, 0, 0, base)
10981112

10991113

11001114
def _quarter_to_myear(year, quarter, freq):

pandas/tslib.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ cpdef object maybe_get_tz(object)
66
cdef bint _is_utc(object)
77
cdef bint _is_tzlocal(object)
88
cdef object _get_dst_info(object)
9+
cdef bint _nat_scalar_rules[6]

0 commit comments

Comments
 (0)