Skip to content

Commit 8ebb094

Browse files
jbrockmendelPingviinituutti
authored andcommitted
PERF: optimize NaT lookups in cython modules (pandas-dev#24008)
1 parent dc3e92a commit 8ebb094

File tree

13 files changed

+69
-59
lines changed

13 files changed

+69
-59
lines changed

pandas/_libs/tslib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ from tslibs.conversion cimport (tz_convert_single, _TSObject,
4040
tz_convert_utc_to_tzlocal)
4141

4242
# many modules still look for NaT and iNaT here despite them not being needed
43-
from tslibs.nattype import nat_strings, NaT, iNaT # noqa:F821
44-
from tslibs.nattype cimport checknull_with_nat, NPY_NAT
43+
from tslibs.nattype import nat_strings, iNaT # noqa:F821
44+
from tslibs.nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT
4545

4646
from tslibs.offsets cimport to_offset
4747

pandas/_libs/tslibs/conversion.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
3939
from timezones import UTC
4040
from parsing import parse_datetime_string
4141

42-
from nattype import nat_strings, NaT
43-
from nattype cimport NPY_NAT, checknull_with_nat
42+
from nattype import nat_strings
43+
from nattype cimport NPY_NAT, checknull_with_nat, c_NaT as NaT
4444

4545
# ----------------------------------------------------------------------
4646
# Constants

pandas/_libs/tslibs/nattype.pxd

+11
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,20 @@
11
# -*- coding: utf-8 -*-
22

3+
from cpython.datetime cimport datetime
4+
35
from numpy cimport int64_t
46
cdef int64_t NPY_NAT
57

68
cdef bint _nat_scalar_rules[6]
79

10+
11+
cdef class _NaT(datetime):
12+
cdef readonly:
13+
int64_t value
14+
object freq
15+
16+
cdef _NaT c_NaT
17+
18+
819
cdef bint checknull_with_nat(object val)
920
cdef bint is_null_datetimelike(object val)

pandas/_libs/tslibs/nattype.pyx

+38-37
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def _make_nan_func(func_name, doc):
4747

4848
def _make_nat_func(func_name, doc):
4949
def f(*args, **kwargs):
50-
return NaT
50+
return c_NaT
5151
f.__name__ = func_name
5252
f.__doc__ = doc
5353
return f
@@ -67,10 +67,10 @@ def _make_error_func(func_name, cls):
6767

6868

6969
cdef _nat_divide_op(self, other):
70-
if PyDelta_Check(other) or is_timedelta64_object(other) or other is NaT:
70+
if PyDelta_Check(other) or is_timedelta64_object(other) or other is c_NaT:
7171
return np.nan
7272
if is_integer_object(other) or is_float_object(other):
73-
return NaT
73+
return c_NaT
7474
return NotImplemented
7575

7676

@@ -82,15 +82,15 @@ cdef _nat_rdivide_op(self, other):
8282

8383
def __nat_unpickle(*args):
8484
# return constant defined in the module
85-
return NaT
85+
return c_NaT
8686

8787
# ----------------------------------------------------------------------
8888

8989

9090
cdef class _NaT(datetime):
91-
cdef readonly:
92-
int64_t value
93-
object freq
91+
# cdef readonly:
92+
# int64_t value
93+
# object freq
9494

9595
def __hash__(_NaT self):
9696
# py3k needs this defined here
@@ -116,18 +116,18 @@ cdef class _NaT(datetime):
116116

117117
def __add__(self, other):
118118
if PyDateTime_Check(other):
119-
return NaT
119+
return c_NaT
120120

121121
elif hasattr(other, 'delta'):
122122
# Timedelta, offsets.Tick, offsets.Week
123-
return NaT
123+
return c_NaT
124124
elif getattr(other, '_typ', None) in ['dateoffset', 'series',
125125
'period', 'datetimeindex',
126126
'timedeltaindex']:
127127
# Duplicate logic in _Timestamp.__add__ to avoid needing
128128
# to subclass; allows us to @final(_Timestamp.__add__)
129129
return NotImplemented
130-
return NaT
130+
return c_NaT
131131

132132
def __sub__(self, other):
133133
# Duplicate some logic from _Timestamp.__sub__ to avoid needing
@@ -184,19 +184,6 @@ cdef class _NaT(datetime):
184184
""" Returns a numpy.datetime64 object with 'ns' precision """
185185
return np.datetime64('NaT', 'ns')
186186

187-
188-
class NaTType(_NaT):
189-
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""
190-
191-
def __new__(cls):
192-
cdef _NaT base
193-
194-
base = _NaT.__new__(cls, 1, 1, 1)
195-
base.value = NPY_NAT
196-
base.freq = None
197-
198-
return base
199-
200187
def __repr__(self):
201188
return 'NaT'
202189

@@ -216,20 +203,11 @@ class NaTType(_NaT):
216203
def __long__(self):
217204
return NPY_NAT
218205

219-
def __reduce_ex__(self, protocol):
220-
# python 3.6 compat
221-
# http://bugs.python.org/issue28730
222-
# now __reduce_ex__ is defined and higher priority than __reduce__
223-
return self.__reduce__()
224-
225-
def __reduce__(self):
226-
return (__nat_unpickle, (None, ))
227-
228206
def total_seconds(self):
229207
"""
230208
Total duration of timedelta in seconds (to ns precision)
231209
"""
232-
# GH 10939
210+
# GH#10939
233211
return np.nan
234212

235213
@property
@@ -260,6 +238,28 @@ class NaTType(_NaT):
260238
def is_year_end(self):
261239
return False
262240

241+
242+
class NaTType(_NaT):
243+
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""
244+
245+
def __new__(cls):
246+
cdef _NaT base
247+
248+
base = _NaT.__new__(cls, 1, 1, 1)
249+
base.value = NPY_NAT
250+
base.freq = None
251+
252+
return base
253+
254+
def __reduce_ex__(self, protocol):
255+
# python 3.6 compat
256+
# http://bugs.python.org/issue28730
257+
# now __reduce_ex__ is defined and higher priority than __reduce__
258+
return self.__reduce__()
259+
260+
def __reduce__(self):
261+
return (__nat_unpickle, (None, ))
262+
263263
def __rdiv__(self, other):
264264
return _nat_rdivide_op(self, other)
265265

@@ -271,7 +271,7 @@ class NaTType(_NaT):
271271

272272
def __rmul__(self, other):
273273
if is_integer_object(other) or is_float_object(other):
274-
return NaT
274+
return c_NaT
275275
return NotImplemented
276276

277277
# ----------------------------------------------------------------------
@@ -659,14 +659,15 @@ class NaTType(_NaT):
659659
""")
660660

661661

662-
NaT = NaTType()
662+
c_NaT = NaTType() # C-visible
663+
NaT = c_NaT # Python-visible
663664

664665

665666
# ----------------------------------------------------------------------
666667

667668
cdef inline bint checknull_with_nat(object val):
668669
""" utility to check if a value is a nat or not """
669-
return val is None or util.is_nan(val) or val is NaT
670+
return val is None or util.is_nan(val) or val is c_NaT
670671

671672

672673
cdef inline bint is_null_datetimelike(object val):
@@ -683,7 +684,7 @@ cdef inline bint is_null_datetimelike(object val):
683684
"""
684685
if val is None or util.is_nan(val):
685686
return True
686-
elif val is NaT:
687+
elif val is c_NaT:
687688
return True
688689
elif util.is_timedelta64_object(val):
689690
return val.view('int64') == NPY_NAT

pandas/_libs/tslibs/period.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ from frequencies cimport (get_freq_code, get_base_alias,
4646
get_rule_month)
4747
from parsing import parse_time_string
4848
from resolution import Resolution
49-
from nattype import nat_strings, NaT
50-
from nattype cimport _nat_scalar_rules, NPY_NAT, is_null_datetimelike
49+
from nattype import nat_strings
50+
from nattype cimport (
51+
_nat_scalar_rules, NPY_NAT, is_null_datetimelike, c_NaT as NaT)
5152
from offsets cimport to_offset
5253
from offsets import _Tick
5354

pandas/_libs/tslibs/timedeltas.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ from ccalendar import DAY_SECONDS
3333
from np_datetime cimport (cmp_scalar, reverse_ops, td64_to_tdstruct,
3434
pandas_timedeltastruct)
3535

36-
from nattype import nat_strings, NaT
37-
from nattype cimport checknull_with_nat, NPY_NAT
36+
from nattype import nat_strings
37+
from nattype cimport checknull_with_nat, NPY_NAT, c_NaT as NaT
3838
from offsets cimport to_offset
3939

4040
# ----------------------------------------------------------------------

pandas/_libs/tslibs/timestamps.pyx

+1-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@ from conversion import tz_localize_to_utc, normalize_i8_timestamps
2626
from conversion cimport (tz_convert_single, _TSObject,
2727
convert_to_tsobject, convert_datetime_to_tsobject)
2828
from fields import get_start_end_field, get_date_name_field
29-
from nattype import NaT
30-
from nattype cimport NPY_NAT
29+
from nattype cimport NPY_NAT, c_NaT as NaT
3130
from np_datetime import OutOfBoundsDatetime
3231
from np_datetime cimport (reverse_ops, cmp_scalar, check_dts_bounds,
3332
npy_datetimestruct, dt64_to_dtstruct)

pandas/_libs/tslibs/timezones.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
from cython import Py_ssize_t
44

5+
from cpython.datetime cimport tzinfo
6+
57
# dateutil compat
68
from dateutil.tz import (
79
tzutc as _dateutil_tzutc,

pandas/core/arrays/datetimes.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,9 @@
66
from pytz import utc
77

88
from pandas._libs import lib, tslib
9-
from pandas._libs.tslib import NaT, Timestamp, iNaT
109
from pandas._libs.tslibs import (
11-
ccalendar, conversion, fields, normalize_date, resolution as libresolution,
12-
timezones)
10+
NaT, Timestamp, ccalendar, conversion, fields, iNaT, normalize_date,
11+
resolution as libresolution, timezones)
1312
import pandas.compat as compat
1413
from pandas.errors import PerformanceWarning
1514
from pandas.util._decorators import Appender, cache_readonly

pandas/core/arrays/period.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@
44

55
import numpy as np
66

7-
from pandas._libs.tslib import NaT, iNaT
8-
from pandas._libs.tslibs import period as libperiod
7+
from pandas._libs.tslibs import NaT, iNaT, period as libperiod
98
from pandas._libs.tslibs.fields import isleapyear_arr
109
from pandas._libs.tslibs.period import (
1110
DIFFERENT_FREQ_INDEX, IncompatibleFrequency, Period, get_period_field_arr,

pandas/tests/scalar/test_nat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
import pytz
66

7-
from pandas._libs.tslib import iNaT
7+
from pandas._libs.tslibs import iNaT
88

99
from pandas import (
1010
DatetimeIndex, Index, NaT, Period, Series, Timedelta, TimedeltaIndex,

pandas/tests/scalar/timedelta/test_timedelta.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
import pytest
66

7-
from pandas._libs.tslib import NaT, iNaT
7+
from pandas._libs.tslibs import NaT, iNaT
88
import pandas.compat as compat
99

1010
import pandas as pd

pandas/tests/tseries/offsets/test_offsets.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,11 @@
55
import pytest
66
import pytz
77

8-
import pandas._libs.tslib as tslib
9-
from pandas._libs.tslib import NaT, Timestamp
10-
from pandas._libs.tslibs import conversion, timezones
8+
from pandas._libs.tslibs import (
9+
NaT, OutOfBoundsDatetime, Timedelta, Timestamp, conversion, timezones)
1110
from pandas._libs.tslibs.frequencies import (
1211
INVALID_FREQ_ERR_MSG, get_freq_code, get_freq_str)
1312
import pandas._libs.tslibs.offsets as liboffsets
14-
from pandas._libs.tslibs.timedeltas import Timedelta
1513
import pandas.compat as compat
1614
from pandas.compat import range
1715
from pandas.compat.numpy import np_datetime64_compat
@@ -124,7 +122,7 @@ def test_apply_out_of_range(self, tz_naive_fixture):
124122
assert isinstance(result, datetime)
125123
assert t.tzinfo == result.tzinfo
126124

127-
except tslib.OutOfBoundsDatetime:
125+
except OutOfBoundsDatetime:
128126
raise
129127
except (ValueError, KeyError):
130128
# we are creating an invalid offset

0 commit comments

Comments
 (0)