Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 3332e3d

Browse files
committedSep 14, 2022
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. Tarantool datetime objects are decoded to `tarantool.Datetime` type. `tarantool.Datetime` objects may be encoded to Tarantool datetime objects. `tarantool.Datetime` is basically a `pandas.Timestamp` wrapper. You can create `tarantool.Datetime` objects - from `pandas.Timestamp` object, - by using the same API as in `pandas.Timestamp()` [2], - from another `tarantool.Datetime` object. To work with datetime data as a `pandas.Timestamp`, convert `tarantool.Datetime` object to a `pandas.Timestamp` with `to_pd_timestamp()` method call. You can use this `pandas.Timestamp` object to build a `tarantool.Datetime` object before sending data to Tarantool. To work with data as `numpy.datetime64` or `datetime.datetime`, convert to a `pandas.Timestamp` and then use `to_datetime64()` or `to_datetime()` converter. `pandas.Timestamp` was chosen to store data because it could be used to store both nanoseconds and timezone information. In-build Python `datetime.datetime` supports microseconds at most, `numpy.datetime64` do not support timezones. Tarantool datetime interval type is planned to be stored in custom type `tarantool.Interval` and we'll need a way to support arithmetic between datetime and interval. This is the reason we use custom class instead of plain `pandas.Timestamp`. This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204
1 parent c70dfa6 commit 3332e3d

File tree

10 files changed

+347
-6
lines changed

10 files changed

+347
-6
lines changed
 

‎CHANGELOG.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
### Added
1010
- Decimal type support (#203).
1111
- UUID type support (#202).
12+
- Datetime type support and tarantool.Datetime type (#204).
13+
14+
Tarantool datetime objects are decoded to `tarantool.Datetime`
15+
type. `tarantool.Datetime` may be encoded to Tarantool datetime
16+
objects.
17+
18+
`tarantool.Datetime` is basically a `pandas.Timestamp` wrapper.
19+
You can create `tarantool.Datetime` objects
20+
- from `pandas.Timestamp` object,
21+
- by using the same API as in `pandas.Timestamp()`,
22+
- from another `tarantool.Datetime` object.
23+
24+
To work with datetime data as a `pandas.Timestamp`, convert
25+
`tarantool.Datetime` object to a `pandas.Timestamp` with
26+
`to_pd_timestamp()` method call. You can use this
27+
`pandas.Timestamp` object to build a `tarantool.Datetime`
28+
object before sending data to Tarantool.
29+
30+
To work with data as `numpy.datetime64` or `datetime.datetime`,
31+
convert to a `pandas.Timestamp` and then use `to_datetime64()`
32+
or `to_datetime()` converter.
1233

1334
### Changed
1435
- Bump msgpack requirement to 1.0.4 (PR #223).

‎requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
msgpack>=1.0.4
2+
pandas

‎tarantool/__init__.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
ENCODING_DEFAULT,
3333
)
3434

35+
from tarantool.msgpack_ext.types.datetime import (
36+
Datetime,
37+
)
38+
3539
__version__ = "0.9.0"
3640

3741

@@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None,
9195

9296
__all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema',
9397
'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning',
94-
'SchemaError', 'dbapi']
98+
'SchemaError', 'dbapi', 'Datetime']
9599

96100
# ConnectionPool is supported only for Python 3.7 or newer.
97101
if sys.version_info.major >= 3 and sys.version_info.minor >= 7:

‎tarantool/msgpack_ext/datetime.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.datetime import Datetime
2+
3+
EXT_ID = 4
4+
5+
def encode(obj):
6+
return obj.msgpack_encode()
7+
8+
def decode(data):
9+
return Datetime(data)

‎tarantool/msgpack_ext/packer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
from uuid import UUID
33
from msgpack import ExtType
44

5+
from tarantool.msgpack_ext.types.datetime import Datetime
6+
57
import tarantool.msgpack_ext.decimal as ext_decimal
68
import tarantool.msgpack_ext.uuid as ext_uuid
9+
import tarantool.msgpack_ext.datetime as ext_datetime
710

811
encoders = [
9-
{'type': Decimal, 'ext': ext_decimal},
10-
{'type': UUID, 'ext': ext_uuid },
12+
{'type': Decimal, 'ext': ext_decimal },
13+
{'type': UUID, 'ext': ext_uuid },
14+
{'type': Datetime, 'ext': ext_datetime},
1115
]
1216

1317
def default(obj):
Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
from copy import deepcopy
2+
3+
import pandas
4+
5+
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
6+
#
7+
# The datetime MessagePack representation looks like this:
8+
# +---------+----------------+==========+-----------------+
9+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
10+
# | = d7/d8 | = 4 | | tzindex; |
11+
# +---------+----------------+==========+-----------------+
12+
# MessagePack data contains:
13+
#
14+
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
15+
# little-endian order.
16+
# * The optional fields (8 bytes), if any of them have a non-zero value.
17+
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
18+
# tzindex (2 bytes) packed in the little-endian order.
19+
#
20+
# seconds is seconds since Epoch, where the epoch is the point where the time
21+
# starts, and is platform dependent. For Unix, the epoch is January 1,
22+
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
23+
# definition in src/lib/core/datetime.h and reasons in
24+
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
25+
#
26+
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
27+
# a definition in src/lib/core/datetime.h.
28+
#
29+
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
30+
# see a structure definition in src/lib/core/datetime.h.
31+
#
32+
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
33+
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
34+
# specified, tzindex has the preference and the tzoffset value is ignored.
35+
36+
SECONDS_SIZE_BYTES = 8
37+
NSEC_SIZE_BYTES = 4
38+
TZOFFSET_SIZE_BYTES = 2
39+
TZINDEX_SIZE_BYTES = 2
40+
41+
BYTEORDER = 'little'
42+
43+
NSEC_IN_SEC = 1000000000
44+
45+
46+
def get_bytes_as_int(data, cursor, size):
47+
part = data[cursor:cursor + size]
48+
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size
49+
50+
def get_int_as_bytes(data, size):
51+
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)
52+
53+
def msgpack_decode(data):
54+
cursor = 0
55+
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
56+
57+
data_len = len(data)
58+
if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \
59+
TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES):
60+
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
61+
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
62+
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
63+
elif data_len == SECONDS_SIZE_BYTES:
64+
nsec = 0
65+
tzoffset = 0
66+
tzindex = 0
67+
else:
68+
raise MsgpackError(f'Unexpected datetime payload length {data_len}')
69+
70+
if (tzoffset != 0) or (tzindex != 0):
71+
raise NotImplementedError
72+
73+
total_nsec = seconds * NSEC_IN_SEC + nsec
74+
75+
return pandas.to_datetime(total_nsec, unit='ns')
76+
77+
class Datetime():
78+
def __init__(self, *args, **kwargs):
79+
if len(args) > 0:
80+
data = args[0]
81+
if isinstance(data, bytes):
82+
self._timestamp = msgpack_decode(data)
83+
return
84+
85+
if isinstance(data, pandas.Timestamp):
86+
self._timestamp = = deepcopy(data)
87+
return
88+
89+
if isinstance(data, Datetime):
90+
self._timestamp = deepcopy(data._timestamp)
91+
return
92+
else:
93+
self._timestamp = pandas.Timestamp(*args, **kwargs)
94+
return
95+
96+
def __eq__(self, other):
97+
if isinstance(other, Datetime):
98+
return self._timestamp == other._timestamp
99+
elif isinstance(other, pandas.Timestamp):
100+
return self._timestamp == other
101+
else:
102+
return False
103+
104+
def __str__(self):
105+
return self._timestamp.__str__()
106+
107+
def __repr__(self):
108+
return self._timestamp.__repr__()
109+
110+
def to_pd_timestamp(self):
111+
return deepcopy(self._timestamp)
112+
113+
def msgpack_encode(self):
114+
ts_value = self._timestamp.value
115+
116+
seconds = ts_value // NSEC_IN_SEC
117+
nsec = ts_value % NSEC_IN_SEC
118+
tzoffset = 0
119+
tzindex = 0
120+
121+
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
122+
123+
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
124+
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
125+
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
126+
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)
127+
128+
return buf

‎tarantool/msgpack_ext/unpacker.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import tarantool.msgpack_ext.decimal as ext_decimal
22
import tarantool.msgpack_ext.uuid as ext_uuid
3+
import tarantool.msgpack_ext.datetime as ext_datetime
34

45
decoders = {
5-
ext_decimal.EXT_ID: ext_decimal.decode,
6-
ext_uuid.EXT_ID : ext_uuid.decode ,
6+
ext_decimal.EXT_ID : ext_decimal.decode ,
7+
ext_uuid.EXT_ID : ext_uuid.decode ,
8+
ext_datetime.EXT_ID: ext_datetime.decode,
79
}
810

911
def ext_hook(code, data):

‎test/suites/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
from .test_ssl import TestSuite_Ssl
1818
from .test_decimal import TestSuite_Decimal
1919
from .test_uuid import TestSuite_UUID
20+
from .test_datetime import TestSuite_Datetime
2021

2122
test_cases = (TestSuite_Schema_UnicodeConnection,
2223
TestSuite_Schema_BinaryConnection,
2324
TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect,
2425
TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI,
2526
TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl,
26-
TestSuite_Decimal, TestSuite_UUID)
27+
TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime)
2728

2829
def load_tests(loader, tests, pattern):
2930
suite = unittest.TestSuite()

‎test/suites/lib/skip.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):
154154

155155
return skip_or_run_test_tarantool(func, '2.4.1',
156156
'does not support UUID type')
157+
158+
def skip_or_run_datetime_test(func):
159+
"""Decorator to skip or run datetime-related tests depending on
160+
the tarantool version.
161+
162+
Tarantool supports datetime type only since 2.10.0 version.
163+
See https://github.com/tarantool/tarantool/issues/5941
164+
"""
165+
166+
return skip_or_run_test_pcall_require(func, 'datetime',
167+
'does not support datetime type')

‎test/suites/test_datetime.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from __future__ import print_function
4+
5+
import sys
6+
import unittest
7+
import msgpack
8+
import warnings
9+
import tarantool
10+
import pandas
11+
12+
from tarantool.msgpack_ext.packer import default as packer_default
13+
from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook
14+
15+
from .lib.tarantool_server import TarantoolServer
16+
from .lib.skip import skip_or_run_datetime_test
17+
from tarantool.error import MsgpackError, MsgpackWarning
18+
19+
class TestSuite_Datetime(unittest.TestCase):
20+
@classmethod
21+
def setUpClass(self):
22+
print(' DATETIME EXT TYPE '.center(70, '='), file=sys.stderr)
23+
print('-' * 70, file=sys.stderr)
24+
self.srv = TarantoolServer()
25+
self.srv.script = 'test/suites/box.lua'
26+
self.srv.start()
27+
28+
self.adm = self.srv.admin
29+
self.adm(r"""
30+
_, datetime = pcall(require, 'datetime')
31+
32+
box.schema.space.create('test')
33+
box.space['test']:create_index('primary', {
34+
type = 'tree',
35+
parts = {1, 'string'},
36+
unique = true})
37+
38+
box.schema.user.create('test', {password = 'test', if_not_exists = true})
39+
box.schema.user.grant('test', 'read,write,execute', 'universe')
40+
""")
41+
42+
self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'],
43+
user='test', password='test')
44+
45+
def setUp(self):
46+
# prevent a remote tarantool from clean our session
47+
if self.srv.is_started():
48+
self.srv.touch_lock()
49+
50+
self.adm("box.space['test']:truncate()")
51+
52+
53+
cases = {
54+
'date': {
55+
'python': tarantool.Datetime(year=2022, month=8, day=31),
56+
'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'),
57+
'tarantool': r"datetime.new({year=2022, month=8, day=31})",
58+
},
59+
'date_unix_start': {
60+
'python': tarantool.Datetime(year=1970, month=1, day=1),
61+
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'),
62+
'tarantool': r"datetime.new({year=1970, month=1, day=1})",
63+
},
64+
'date_before_1970': {
65+
'python': tarantool.Datetime(year=1900, month=1, day=1),
66+
'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'),
67+
'tarantool': r"datetime.new({year=1900, month=1, day=1})",
68+
},
69+
'datetime_with_minutes': {
70+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7),
71+
'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'),
72+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})",
73+
},
74+
'datetime_with_seconds': {
75+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54),
76+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'),
77+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})",
78+
},
79+
'datetime_with_microseconds': {
80+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
81+
microsecond=308543),
82+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'),
83+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
84+
r"nsec=308543000})",
85+
},
86+
'datetime_with_nanoseconds': {
87+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
88+
microsecond=308543, nanosecond=321),
89+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
90+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
91+
r"nsec=308543321})",
92+
},
93+
'pandas_timestamp': {
94+
'python': tarantool.Datetime(pandas.Timestamp(
95+
year=2022, month=8, day=31, hour=18, minute=7, second=54,
96+
microsecond=308543, nanosecond=321
97+
)),
98+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
99+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
100+
r"nsec=308543321})",
101+
},
102+
}
103+
104+
def test_msgpack_decode(self):
105+
for name in self.cases.keys():
106+
with self.subTest(msg=name):
107+
case = self.cases[name]
108+
109+
self.assertEqual(unpacker_ext_hook(4, case['msgpack']),
110+
case['python'])
111+
112+
@skip_or_run_datetime_test
113+
def test_tarantool_decode(self):
114+
for name in self.cases.keys():
115+
with self.subTest(msg=name):
116+
case = self.cases[name]
117+
118+
self.adm(f"box.space['test']:replace{{'{name}', {case['tarantool']}}}")
119+
120+
self.assertSequenceEqual(self.con.select('test', name),
121+
[[name, case['python']]])
122+
123+
def test_msgpack_encode(self):
124+
for name in self.cases.keys():
125+
with self.subTest(msg=name):
126+
case = self.cases[name]
127+
128+
self.assertEqual(packer_default(case['python']),
129+
msgpack.ExtType(code=4, data=case['msgpack']))
130+
131+
@skip_or_run_datetime_test
132+
def test_tarantool_encode(self):
133+
for name in self.cases.keys():
134+
with self.subTest(msg=name):
135+
case = self.cases[name]
136+
137+
self.con.insert('test', [name, case['python']])
138+
139+
lua_eval = f"""
140+
local dt = {case['tarantool']}
141+
142+
local tuple = box.space['test']:get('{name}')
143+
assert(tuple ~= nil)
144+
145+
if tuple[2] == dt then
146+
return true
147+
else
148+
return nil, ('%s is not equal to expected %s'):format(
149+
tostring(tuple[2]), tostring(dt))
150+
end
151+
"""
152+
153+
self.assertSequenceEqual(self.adm(lua_eval), [True])
154+
155+
156+
@classmethod
157+
def tearDownClass(self):
158+
self.con.close()
159+
self.srv.stop()
160+
self.srv.clean()

0 commit comments

Comments
 (0)
Please sign in to comment.