|
| 1 | +from copy import deepcopy |
| 2 | + |
| 3 | +import pandas |
| 4 | + |
| 5 | +# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type |
| 6 | +# |
| 7 | +# The datetime MessagePack representation looks like this: |
| 8 | +# +---------+----------------+==========+-----------------+ |
| 9 | +# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | |
| 10 | +# | = d7/d8 | = 4 | | tzindex; | |
| 11 | +# +---------+----------------+==========+-----------------+ |
| 12 | +# MessagePack data contains: |
| 13 | +# |
| 14 | +# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the |
| 15 | +# little-endian order. |
| 16 | +# * The optional fields (8 bytes), if any of them have a non-zero value. |
| 17 | +# The fields include nsec (4 bytes), tzoffset (2 bytes), and |
| 18 | +# tzindex (2 bytes) packed in the little-endian order. |
| 19 | +# |
| 20 | +# seconds is seconds since Epoch, where the epoch is the point where the time |
| 21 | +# starts, and is platform dependent. For Unix, the epoch is January 1, |
| 22 | +# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure |
| 23 | +# definition in src/lib/core/datetime.h and reasons in |
| 24 | +# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c |
| 25 | +# |
| 26 | +# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see |
| 27 | +# a definition in src/lib/core/datetime.h. |
| 28 | +# |
| 29 | +# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type, |
| 30 | +# see a structure definition in src/lib/core/datetime.h. |
| 31 | +# |
| 32 | +# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure |
| 33 | +# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are |
| 34 | +# specified, tzindex has the preference and the tzoffset value is ignored. |
| 35 | + |
| 36 | +SECONDS_SIZE_BYTES = 8 |
| 37 | +NSEC_SIZE_BYTES = 4 |
| 38 | +TZOFFSET_SIZE_BYTES = 2 |
| 39 | +TZINDEX_SIZE_BYTES = 2 |
| 40 | + |
| 41 | +BYTEORDER = 'little' |
| 42 | + |
| 43 | +NSEC_IN_SEC = 1000000000 |
| 44 | + |
| 45 | + |
| 46 | +def get_bytes_as_int(data, cursor, size): |
| 47 | + part = data[cursor:cursor + size] |
| 48 | + return int.from_bytes(part, BYTEORDER, signed=True), cursor + size |
| 49 | + |
| 50 | +def get_int_as_bytes(data, size): |
| 51 | + return data.to_bytes(size, byteorder=BYTEORDER, signed=True) |
| 52 | + |
| 53 | +def msgpack_decode(data): |
| 54 | + cursor = 0 |
| 55 | + seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) |
| 56 | + |
| 57 | + if len(data) > SECONDS_SIZE_BYTES: |
| 58 | + nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) |
| 59 | + tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) |
| 60 | + tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) |
| 61 | + elif len(data) == SECONDS_SIZE_BYTES: |
| 62 | + nsec = 0 |
| 63 | + tzoffset = 0 |
| 64 | + tzindex = 0 |
| 65 | + else: |
| 66 | + raise MsgpackError('Unexpected datetime payload length') |
| 67 | + |
| 68 | + if (tzoffset != 0) or (tzindex != 0): |
| 69 | + raise NotImplementedError |
| 70 | + |
| 71 | + total_nsec = seconds * NSEC_IN_SEC + nsec |
| 72 | + |
| 73 | + return pandas.to_datetime(total_nsec, unit='ns') |
| 74 | + |
| 75 | +class Datetime(): |
| 76 | + def __init__(self, *args, **kwargs): |
| 77 | + if len(args) > 0: |
| 78 | + data = args[0] |
| 79 | + if isinstance(data, bytes): |
| 80 | + self._timestamp = msgpack_decode(data) |
| 81 | + return |
| 82 | + |
| 83 | + if isinstance(data, pandas.Timestamp): |
| 84 | + self._timestamp = = deepcopy(data) |
| 85 | + return |
| 86 | + |
| 87 | + if isinstance(data, Datetime): |
| 88 | + self._timestamp = deepcopy(data._timestamp) |
| 89 | + return |
| 90 | + else: |
| 91 | + self._timestamp = pandas.Timestamp(*args, **kwargs) |
| 92 | + return |
| 93 | + |
| 94 | + def __eq__(self, other): |
| 95 | + if isinstance(other, Datetime): |
| 96 | + return self._timestamp == other._timestamp |
| 97 | + elif isinstance(other, pandas.Timestamp): |
| 98 | + return self._timestamp == other |
| 99 | + else: |
| 100 | + return False |
| 101 | + |
| 102 | + def __str__(self): |
| 103 | + return self._timestamp.__str__() |
| 104 | + |
| 105 | + def __repr__(self): |
| 106 | + return self._timestamp.__repr__() |
| 107 | + |
| 108 | + def to_pd_timestamp(self): |
| 109 | + return deepcopy(self._timestamp) |
| 110 | + |
| 111 | + def msgpack_encode(self): |
| 112 | + ts_value = self._timestamp.value |
| 113 | + |
| 114 | + seconds = ts_value // NSEC_IN_SEC |
| 115 | + nsec = ts_value % NSEC_IN_SEC |
| 116 | + tzoffset = 0 |
| 117 | + tzindex = 0 |
| 118 | + |
| 119 | + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) |
| 120 | + |
| 121 | + if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): |
| 122 | + buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) |
| 123 | + buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) |
| 124 | + buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) |
| 125 | + |
| 126 | + return buf |
0 commit comments