Skip to content

Commit 63b0d9b

Browse files
committed
REF: microsecond -> nanosecond migration, most of the way there #1238
1 parent 330f34f commit 63b0d9b

File tree

14 files changed

+186
-143
lines changed

14 files changed

+186
-143
lines changed

pandas/core/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def isnull(obj):
6767

6868
if isinstance(obj, Series):
6969
result = Series(result, index=obj.index, copy=False)
70-
elif obj.dtype == np.datetime64:
70+
elif obj.dtype == np.dtype('M8[ns]'):
7171
# this is the NaT pattern
7272
result = np.array(obj).view('i8') == lib.NaT
7373
else:

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1125,7 +1125,7 @@ def form_blocks(data, axes):
11251125

11261126
if len(datetime_dict):
11271127
datetime_block = _simple_blockify(datetime_dict, items,
1128-
np.dtype('M8[us]'))
1128+
np.dtype('M8[ns]'))
11291129
blocks.append(datetime_block)
11301130

11311131
if len(bool_dict):

pandas/core/nanops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -406,7 +406,7 @@ def unique1d(values):
406406
dtype=np.int64)
407407

408408
if values.dtype == np.datetime64:
409-
uniques = uniques.view('M8[us]')
409+
uniques = uniques.view('M8[ns]')
410410
else:
411411
table = lib.PyObjectHashTable(len(values))
412412
uniques = table.unique(com._ensure_object(values))

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ def _read_array(group, key):
963963

964964
def _unconvert_index(data, kind):
965965
if kind == 'datetime64':
966-
index = np.array(data, dtype='M8[us]')
966+
index = np.array(data, dtype='M8[ns]')
967967
elif kind == 'datetime':
968968
index = np.array([datetime.fromtimestamp(v) for v in data],
969969
dtype=object)

pandas/src/datetime.pyx

+36-45
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,9 @@ except NameError: # py3
4747
# This serves as the box for datetime64
4848
class Timestamp(_Timestamp):
4949

50-
__slots__ = ['value', 'offset']
51-
5250
def __new__(cls, object ts_input, object offset=None, tz=None):
5351
cdef _TSObject ts
52+
cdef _Timestamp ts_base
5453

5554
if isinstance(ts_input, float):
5655
# to do, do we want to support this, ie with fractional seconds?
@@ -72,6 +71,7 @@ class Timestamp(_Timestamp):
7271
# fill out rest of data
7372
ts_base.value = ts.value
7473
ts_base.offset = offset
74+
ts_base.nanosecond = ts.dts.ps / 1000
7575

7676
return ts_base
7777

@@ -185,7 +185,7 @@ def apply_offset(ndarray[object] values, object offset):
185185
ndarray[int64_t] new_values
186186
object boxed
187187

188-
result = np.empty(n, dtype='M8[us]')
188+
result = np.empty(n, dtype='M8[ns]')
189189
new_values = result.view('i8')
190190
pass
191191

@@ -194,8 +194,8 @@ def apply_offset(ndarray[object] values, object offset):
194194
# (see Timestamp class above). This will serve as a C extension type that
195195
# shadows the python class, where we do any heavy lifting.
196196
cdef class _Timestamp(datetime):
197-
cdef:
198-
int64_t value # numpy int64
197+
cdef public:
198+
int64_t value, nanosecond
199199
object offset # frequency reference
200200

201201
def __add__(self, other):
@@ -250,13 +250,13 @@ cpdef convert_to_tsobject(object ts, object tz=None):
250250

251251
if is_datetime64_object(ts):
252252
obj.value = unbox_datetime64_scalar(ts)
253-
pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us, &obj.dts)
253+
pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
254254
elif is_integer_object(ts):
255255
obj.value = ts
256-
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_us, &obj.dts)
256+
pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
257257
elif util.is_string_object(ts):
258258
_string_to_dts(ts, &obj.dts)
259-
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &obj.dts)
259+
obj.value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &obj.dts)
260260
elif PyDateTime_Check(ts):
261261
obj.value = _pydatetime_to_dts(ts, &obj.dts)
262262
obj.tzinfo = ts.tzinfo
@@ -280,7 +280,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
280280
obj.value = obj.value + deltas[pos]
281281

282282
if utc_convert:
283-
pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_us,
283+
pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns,
284284
&obj.dts)
285285
obj.tzinfo = tz._tzinfos[inf]
286286

@@ -297,7 +297,7 @@ cpdef convert_to_tsobject(object ts, object tz=None):
297297

298298
cdef inline object _datetime64_to_datetime(int64_t val):
299299
cdef pandas_datetimestruct dts
300-
pandas_datetime_to_datetimestruct(val, PANDAS_FR_us, &dts)
300+
pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
301301
return _dts_to_pydatetime(&dts)
302302

303303
cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts):
@@ -313,7 +313,7 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
313313
dts.min = PyDateTime_DATE_GET_MINUTE(val)
314314
dts.sec = PyDateTime_DATE_GET_SECOND(val)
315315
dts.us = PyDateTime_DATE_GET_MICROSECOND(val)
316-
return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
316+
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
317317

318318
cdef inline int64_t _dtlike_to_datetime64(object val,
319319
pandas_datetimestruct *dts):
@@ -324,7 +324,7 @@ cdef inline int64_t _dtlike_to_datetime64(object val,
324324
dts.min = val.minute
325325
dts.sec = val.second
326326
dts.us = val.microsecond
327-
return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
327+
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
328328

329329
cdef inline int64_t _date_to_datetime64(object val,
330330
pandas_datetimestruct *dts):
@@ -335,7 +335,7 @@ cdef inline int64_t _date_to_datetime64(object val,
335335
dts.min = 0
336336
dts.sec = 0
337337
dts.us = 0
338-
return pandas_datetimestruct_to_datetime(PANDAS_FR_us, dts)
338+
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
339339

340340

341341
cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts) except -1:
@@ -345,7 +345,7 @@ cdef inline int _string_to_dts(object val, pandas_datetimestruct* dts) except -1
345345

346346
if PyUnicode_Check(val):
347347
val = PyUnicode_AsASCIIString(val);
348-
parse_iso_8601_datetime(val, len(val), PANDAS_FR_us, NPY_UNSAFE_CASTING,
348+
parse_iso_8601_datetime(val, len(val), PANDAS_FR_ns, NPY_UNSAFE_CASTING,
349349
dts, &islocal, &out_bestunit, &special)
350350
return 0
351351

@@ -738,7 +738,7 @@ def string_to_datetime(ndarray[object] strings, raise_=False, dayfirst=False):
738738
from dateutil.parser import parse
739739

740740
try:
741-
result = np.empty(n, dtype='M8[us]')
741+
result = np.empty(n, dtype='M8[ns]')
742742
iresult = result.view('i8')
743743
for i in range(n):
744744
val = strings[i]
@@ -903,7 +903,7 @@ def _get_transitions(tz):
903903
Get UTC times of DST transitions
904904
"""
905905
if tz not in trans_cache:
906-
arr = np.array(tz._utc_transition_times, dtype='M8[us]')
906+
arr = np.array(tz._utc_transition_times, dtype='M8[ns]')
907907
trans_cache[tz] = arr.view('i8')
908908
return trans_cache[tz]
909909

@@ -1009,7 +1009,7 @@ def build_field_sarray(ndarray[int64_t] dtindex):
10091009
mus = out['u']
10101010

10111011
for i in range(count):
1012-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1012+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10131013
years[i] = dts.year
10141014
months[i] = dts.month
10151015
days[i] = dts.day
@@ -1044,49 +1044,49 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
10441044

10451045
if field == 'Y':
10461046
for i in range(count):
1047-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1047+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10481048
out[i] = dts.year
10491049
return out
10501050

10511051
elif field == 'M':
10521052
for i in range(count):
1053-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1053+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10541054
out[i] = dts.month
10551055
return out
10561056

10571057
elif field == 'D':
10581058
for i in range(count):
1059-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1059+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10601060
out[i] = dts.day
10611061
return out
10621062

10631063
elif field == 'h':
10641064
for i in range(count):
1065-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1065+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10661066
out[i] = dts.hour
10671067
return out
10681068

10691069
elif field == 'm':
10701070
for i in range(count):
1071-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1071+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10721072
out[i] = dts.min
10731073
return out
10741074

10751075
elif field == 's':
10761076
for i in range(count):
1077-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1077+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10781078
out[i] = dts.sec
10791079
return out
10801080

10811081
elif field == 'us':
10821082
for i in range(count):
1083-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1083+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10841084
out[i] = dts.us
10851085
return out
10861086

10871087
elif field == 'doy':
10881088
for i in range(count):
1089-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1089+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
10901090
isleap = is_leapyear(dts.year)
10911091
out[i] = _month_offset[isleap, dts.month-1] + dts.day
10921092
return out
@@ -1099,15 +1099,15 @@ def fast_field_accessor(ndarray[int64_t] dtindex, object field):
10991099

11001100
elif field == 'woy':
11011101
for i in range(count):
1102-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1102+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
11031103
isleap = is_leapyear(dts.year)
11041104
out[i] = _month_offset[isleap, dts.month - 1] + dts.day
11051105
out[i] = ((out[i] - 1) / 7) + 1
11061106
return out
11071107

11081108
elif field == 'q':
11091109
for i in range(count):
1110-
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_us, &dts)
1110+
pandas_datetime_to_datetimestruct(dtindex[i], PANDAS_FR_ns, &dts)
11111111
out[i] = dts.month
11121112
out[i] = ((out[i] - 1) / 3) + 1
11131113
return out
@@ -1119,7 +1119,7 @@ cdef inline int m8_weekday(int64_t val):
11191119
ts = convert_to_tsobject(val)
11201120
return ts_dayofweek(ts)
11211121

1122-
cdef int64_t DAY_US = 86400000000LL
1122+
cdef int64_t DAY_NS = 86400000000000LL
11231123

11241124
def values_at_time(ndarray[int64_t] stamps, int64_t time):
11251125
cdef:
@@ -1133,18 +1133,14 @@ def values_at_time(ndarray[int64_t] stamps, int64_t time):
11331133
return np.empty(0, dtype=np.int64)
11341134

11351135
# is this OK?
1136-
# days = stamps // DAY_US
1137-
times = stamps % DAY_US
1136+
# days = stamps // DAY_NS
1137+
times = stamps % DAY_NS
11381138

1139-
# Microsecond resolution
1139+
# Nanosecond resolution
11401140
count = 0
11411141
for i in range(1, n):
11421142
if times[i] == time:
11431143
count += 1
1144-
# cur = days[i]
1145-
# if cur > last:
1146-
# count += 1
1147-
# last = cur
11481144

11491145
indexer = np.empty(count, dtype=np.int64)
11501146

@@ -1155,11 +1151,6 @@ def values_at_time(ndarray[int64_t] stamps, int64_t time):
11551151
indexer[j] = i
11561152
j += 1
11571153

1158-
# cur = days[i]
1159-
# if cur > last:
1160-
# j += 1
1161-
# last = cur
1162-
11631154
return indexer
11641155

11651156

@@ -1170,12 +1161,12 @@ def date_normalize(ndarray[int64_t] stamps):
11701161
pandas_datetimestruct dts
11711162

11721163
for i in range(n):
1173-
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
1164+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
11741165
dts.hour = 0
11751166
dts.min = 0
11761167
dts.sec = 0
11771168
dts.us = 0
1178-
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
1169+
result[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
11791170

11801171
return result
11811172

@@ -1185,7 +1176,7 @@ def dates_normalized(ndarray[int64_t] stamps):
11851176
pandas_datetimestruct dts
11861177

11871178
for i in range(n):
1188-
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_us, &dts)
1179+
pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
11891180
if (dts.hour + dts.min + dts.sec + dts.us) > 0:
11901181
return False
11911182

@@ -1250,7 +1241,7 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq):
12501241
out = np.empty(l, dtype='i8')
12511242

12521243
for i in range(l):
1253-
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_us, &dts)
1244+
pandas_datetime_to_datetimestruct(dtarr[i], PANDAS_FR_ns, &dts)
12541245
out[i] = get_period_ordinal(dts.year, dts.month, dts.day,
12551246
dts.hour, dts.min, dts.sec, freq)
12561247
return out
@@ -1349,7 +1340,7 @@ cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq):
13491340
dts.sec = int(dinfo.second)
13501341
dts.us = 0
13511342

1352-
return pandas_datetimestruct_to_datetime(PANDAS_FR_us, &dts)
1343+
return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
13531344

13541345
def period_ordinal_to_string(int64_t value, int freq):
13551346
cdef:

pandas/src/engines.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -415,20 +415,20 @@ cdef class DatetimeEngine(Int64Engine):
415415

416416
def get_indexer(self, values):
417417
self._ensure_mapping_populated()
418-
if values.dtype != 'M8':
418+
if values.dtype != 'M8[ns]':
419419
return np.repeat(-1, len(values)).astype('i4')
420420
values = np.asarray(values).view('i8')
421421
return self.mapping.lookup(values)
422422

423423
def get_pad_indexer(self, other, limit=None):
424-
if other.dtype != 'M8':
424+
if other.dtype != 'M8[ns]':
425425
return np.repeat(-1, len(other)).astype('i4')
426426
other = np.asarray(other).view('i8')
427427
return _algos.pad_int64(self._get_index_values(), other,
428428
limit=limit)
429429

430430
def get_backfill_indexer(self, other, limit=None):
431-
if other.dtype != 'M8':
431+
if other.dtype != 'M8[ns]':
432432
return np.repeat(-1, len(other)).astype('i4')
433433
other = np.asarray(other).view('i8')
434434
return _algos.backfill_int64(self._get_index_values(), other,

pandas/tests/test_tseries.py

+2
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ def test_maybe_booleans_to_slice():
197197
result = lib.maybe_booleans_to_slice(arr)
198198
assert(result.dtype == np.bool_)
199199

200+
result = lib.maybe_booleans_to_slice(arr[:0])
201+
assert(result == slice(0, 0))
200202

201203
def test_convert_objects():
202204
arr = np.array(['a', 'b', nan, nan, 'd', 'e', 'f'], dtype='O')

pandas/tools/tests/test_merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1198,7 +1198,7 @@ def test_concat_series(self):
11981198
result = concat(pieces, keys=[0, 1, 2])
11991199
expected = ts.copy()
12001200

1201-
ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[us]'))
1201+
ts.index = DatetimeIndex(np.array(ts.index.values, dtype='M8[ns]'))
12021202

12031203
exp_labels = [np.repeat([0, 1, 2], [len(x) for x in pieces]),
12041204
np.arange(len(ts))]

0 commit comments

Comments
 (0)