From fe6f9b1a933673a7f926b1c0c2b7847b3d400464 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Mon, 24 Oct 2022 11:54:16 +0300 Subject: [PATCH 1/4] test: fix dbapi test connection resource warnings DBAPI2 compliance tests are not implemented here but inherited from external module [1]. Two tests from this module open a connection and forget to close it. The issue had been filed together with patch PR to module repository [2], but the last update was 7 years ago so it is possibly that it would never be merged. This patch adds this PR change with method overwrite. 1. https://pypi.org/project/dbapi-compliance/ 2. https://github.com/baztian/dbapi-compliance/issues/5 Part of #250 --- test/suites/test_dbapi.py | 40 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test/suites/test_dbapi.py b/test/suites/test_dbapi.py index 3873fa84..0df06337 100644 --- a/test/suites/test_dbapi.py +++ b/test/suites/test_dbapi.py @@ -127,3 +127,43 @@ def test_setoutputsize(self): # Do nothing @unittest.skip('Not implemented') def test_description(self): pass + + def test_ExceptionsAsConnectionAttributes(self): + # Workaround for https://github.com/baztian/dbapi-compliance/issues/5 + + # OPTIONAL EXTENSION + # Test for the optional DB API 2.0 extension, where the exceptions + # are exposed as attributes on the Connection object + # I figure this optional extension will be implemented by any + # driver author who is using this test suite, so it is enabled + # by default. + drv = self.driver + con = self._connect() + try: + dbapi20._failUnless(self,con.Warning is drv.Warning) + dbapi20._failUnless(self,con.Error is drv.Error) + dbapi20._failUnless(self,con.InterfaceError is drv.InterfaceError) + dbapi20._failUnless(self,con.DatabaseError is drv.DatabaseError) + dbapi20._failUnless(self,con.OperationalError is drv.OperationalError) + dbapi20._failUnless(self,con.IntegrityError is drv.IntegrityError) + dbapi20._failUnless(self,con.InternalError is drv.InternalError) + dbapi20._failUnless(self,con.ProgrammingError is drv.ProgrammingError) + dbapi20. _failUnless(self,con.NotSupportedError is drv.NotSupportedError) + finally: + con.close() + + + def test_rollback(self): + # Workaround for https://github.com/baztian/dbapi-compliance/issues/5 + + con = self._connect() + try: + # If rollback is defined, it should either work or throw + # the documented exception + if hasattr(con,'rollback'): + try: + con.rollback() + except self.driver.NotSupportedError: + pass + finally: + con.close() From b338a1fbb14d4a0cb55d29d6ecaf87b2cadd6cde Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Mon, 24 Oct 2022 15:46:21 +0300 Subject: [PATCH 2/4] api: pandas way to build datetime from timestamp This option is required so it would be possible to decode Datetime with external function without constructing excessive pandas.Timestamp object. Follows #204 --- CHANGELOG.md | 45 ++++++++++++ tarantool/msgpack_ext/types/datetime.py | 98 +++++++++++++++++++------ test/suites/test_datetime.py | 6 ++ 3 files changed, 128 insertions(+), 21 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b8b2a75..1852f4d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -136,6 +136,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support iproto feature discovery (#206). +- Support pandas way to build datetime from timestamp (PR #252). + + `timestamp_since_utc_epoch` is a parameter to set timestamp + convertion behavior for timezone-aware datetimes. + + If ``False`` (default), behaves similar to Tarantool `datetime.new()`: + + ```python + >>> dt = tarantool.Datetime(timestamp=1640995200, timestamp_since_utc_epoch=False) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00'), tz: "" + >>> dt.timestamp + 1640995200.0 + >>> dt = tarantool.Datetime(timestamp=1640995200, tz='Europe/Moscow', + ... timestamp_since_utc_epoch=False) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00+0300', tz='Europe/Moscow'), tz: "Europe/Moscow" + >>> dt.timestamp + 1640984400.0 + ``` + + Thus, if ``False``, datetime is computed from timestamp + since epoch and then timezone is applied without any + convertion. In that case, `dt.timestamp` won't be equal to + initialization `timestamp` for all timezones with non-zero offset. + + If ``True``, behaves similar to `pandas.Timestamp`: + + ```python + >>> dt = tarantool.Datetime(timestamp=1640995200, timestamp_since_utc_epoch=True) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00'), tz: "" + >>> dt.timestamp + 1640995200.0 + >>> dt = tarantool.Datetime(timestamp=1640995200, tz='Europe/Moscow', + ... timestamp_since_utc_epoch=True) + >>> dt + datetime: Timestamp('2022-01-01 03:00:00+0300', tz='Europe/Moscow'), tz: "Europe/Moscow" + >>> dt.timestamp + 1640995200.0 + ``` + + Thus, if ``True``, datetime is computed in a way that `dt.timestamp` will + always be equal to initialization `timestamp`. + ### Changed - Bump msgpack requirement to 1.0.4 (PR #223). The only reason of this bump is various vulnerability fixes, diff --git a/tarantool/msgpack_ext/types/datetime.py b/tarantool/msgpack_ext/types/datetime.py index d84352dd..b2dac8a9 100644 --- a/tarantool/msgpack_ext/types/datetime.py +++ b/tarantool/msgpack_ext/types/datetime.py @@ -279,7 +279,7 @@ class Datetime(): def __init__(self, data=None, *, timestamp=None, year=None, month=None, day=None, hour=None, minute=None, sec=None, nsec=None, - tzoffset=0, tz=''): + tzoffset=0, tz='', timestamp_since_utc_epoch=False): """ :param data: MessagePack binary data to decode. If provided, all other parameters are ignored. @@ -294,7 +294,10 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, :paramref:`~tarantool.Datetime.params.minute`, :paramref:`~tarantool.Datetime.params.sec`. If :paramref:`~tarantool.Datetime.params.nsec` is provided, - it must be :obj:`int`. + it must be :obj:`int`. Refer to + :paramref:`~tarantool.Datetime.params.timestamp_since_utc_epoch` + to clarify how timezone-aware datetime is computed from + the timestamp. :type timestamp: :obj:`float` or :obj:`int`, optional :param year: Datetime year value. Must be a valid @@ -344,8 +347,60 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, :param tz: Timezone name from Olson timezone database. :type tz: :obj:`str`, optional + :param timestamp_since_utc_epoch: Parameter to set timestamp + convertion behavior for timezone-aware datetimes. + + If ``False`` (default), behaves similar to Tarantool + `datetime.new()`_: + + .. code-block:: python + + >>> dt = tarantool.Datetime(timestamp=1640995200, timestamp_since_utc_epoch=False) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00'), tz: "" + >>> dt.timestamp + 1640995200.0 + >>> dt = tarantool.Datetime(timestamp=1640995200, tz='Europe/Moscow', + ... timestamp_since_utc_epoch=False) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00+0300', tz='Europe/Moscow'), tz: "Europe/Moscow" + >>> dt.timestamp + 1640984400.0 + + Thus, if ``False``, datetime is computed from timestamp + since epoch and then timezone is applied without any + convertion. In that case, + :attr:`~tarantool.Datetime.timestamp` won't be equal to + initialization + :paramref:`~tarantool.Datetime.params.timestamp` for all + timezones with non-zero offset. + + If ``True``, behaves similar to :class:`pandas.Timestamp`: + + .. code-block:: python + + >>> dt = tarantool.Datetime(timestamp=1640995200, timestamp_since_utc_epoch=True) + >>> dt + datetime: Timestamp('2022-01-01 00:00:00'), tz: "" + >>> dt.timestamp + 1640995200.0 + >>> dt = tarantool.Datetime(timestamp=1640995200, tz='Europe/Moscow', + ... timestamp_since_utc_epoch=True) + >>> dt + datetime: Timestamp('2022-01-01 03:00:00+0300', tz='Europe/Moscow'), tz: "Europe/Moscow" + >>> dt.timestamp + 1640995200.0 + + Thus, if ``True``, datetime is computed in a way that + :attr:`~tarantool.Datetime.timestamp` will always be equal + to initialization + :paramref:`~tarantool.Datetime.params.timestamp`. + :type timestamp_since_utc_epoch: :obj:`bool`, optional + :raise: :exc:`ValueError`, :exc:`~tarantool.error.MsgpackError`, :class:`pandas.Timestamp` exceptions + + .. _datetime.new(): https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ """ if data is not None: @@ -358,6 +413,16 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, self._tz = tz return + tzinfo = None + if tz != '': + if tz not in tt_timezones.timezoneToIndex: + raise ValueError(f'Unknown Tarantool timezone "{tz}"') + + tzinfo = get_python_tzinfo(tz, ValueError) + elif tzoffset != 0: + tzinfo = pytz.FixedOffset(tzoffset) + self._tz = tz + # The logic is same as in Tarantool, refer to datetime API. # https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ if timestamp is not None: @@ -375,6 +440,11 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, datetime = pandas.to_datetime(total_nsec, unit='ns') else: datetime = pandas.to_datetime(timestamp, unit='s') + + if not timestamp_since_utc_epoch: + self._datetime = datetime.replace(tzinfo=tzinfo) + else: + self._datetime = datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo) else: if nsec is not None: microsecond = nsec // NSEC_IN_MKSEC @@ -383,25 +453,11 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, microsecond = 0 nanosecond = 0 - datetime = pandas.Timestamp(year=year, month=month, day=day, - hour=hour, minute=minute, second=sec, - microsecond=microsecond, - nanosecond=nanosecond) - - if tz != '': - if tz not in tt_timezones.timezoneToIndex: - raise ValueError(f'Unknown Tarantool timezone "{tz}"') - - tzinfo = get_python_tzinfo(tz, ValueError) - self._datetime = datetime.replace(tzinfo=tzinfo) - self._tz = tz - elif tzoffset != 0: - tzinfo = pytz.FixedOffset(tzoffset) - self._datetime = datetime.replace(tzinfo=tzinfo) - self._tz = '' - else: - self._datetime = datetime - self._tz = '' + self._datetime = pandas.Timestamp( + year=year, month=month, day=day, + hour=hour, minute=minute, second=sec, + microsecond=microsecond, + nanosecond=nanosecond, tzinfo=tzinfo) def _interval_operation(self, other, sign=1): """ diff --git a/test/suites/test_datetime.py b/test/suites/test_datetime.py index 3e4ee763..583f9058 100644 --- a/test/suites/test_datetime.py +++ b/test/suites/test_datetime.py @@ -270,6 +270,12 @@ def test_Datetime_class_invalid_init(self): 'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " + r"nsec=308543321, tz='AZODT'})", }, + 'timestamp_since_utc_epoch': { + 'python': tarantool.Datetime(timestamp=1661958474, nsec=308543321, + tz='Europe/Moscow', timestamp_since_utc_epoch=True), + 'msgpack': (b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\xb4\x00\xb3\x03'), + 'tarantool': r"datetime.new({timestamp=1661969274, nsec=308543321, tz='Europe/Moscow'})", + }, } def test_msgpack_decode(self): From 1f842558cda8451d80211e1388a19e497d9c6107 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Mon, 24 Oct 2022 16:06:34 +0300 Subject: [PATCH 3/4] api: extract datetime encode/decode from class Extract tarantool.Datetime encode and decode to external functions. This is a breaking change, but since there is no tagged release with Datetime yet and API was more internal rather than public, it shouldn't be an issue. Follows #204 --- CHANGELOG.md | 2 + tarantool/msgpack_ext/datetime.py | 146 +++++++++++++++++- tarantool/msgpack_ext/types/datetime.py | 191 +----------------------- test/suites/test_datetime.py | 8 +- 4 files changed, 153 insertions(+), 194 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1852f4d8..ff7b56d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -189,6 +189,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Update API documentation strings (#67). - Update documentation index, quick start and guide pages (#67). - Use git version to set package version (#238). +- Extract tarantool.Datetime encode and decode to external + functions (PR #252). ### Fixed - Package build (#238). diff --git a/tarantool/msgpack_ext/datetime.py b/tarantool/msgpack_ext/datetime.py index e47f162e..fc1045d4 100644 --- a/tarantool/msgpack_ext/datetime.py +++ b/tarantool/msgpack_ext/datetime.py @@ -1,18 +1,83 @@ """ Tarantool `datetime`_ extension type support module. -Refer to :mod:`~tarantool.msgpack_ext.types.datetime`. +The datetime MessagePack representation looks like this: -.. _datetime: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type +.. code-block:: text + + +---------+----------------+==========+-----------------+ + | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | + | = d7/d8 | = 4 | | tzindex; | + +---------+----------------+==========+-----------------+ + +MessagePack data contains: + +* Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the + little-endian order. +* The optional fields (8 bytes), if any of them have a non-zero value. + The fields include nsec (4 bytes), tzoffset (2 bytes), and + tzindex (2 bytes) packed in the little-endian order. + +``seconds`` is seconds since Epoch, where the epoch is the point where +the time starts, and is platform dependent. For Unix, the epoch is +January 1, 1970, 00:00:00 (UTC). Tarantool uses a ``double`` type, see a +structure definition in src/lib/core/datetime.h and reasons in +`datetime RFC`_. + +``nsec`` is nanoseconds, fractional part of seconds. Tarantool uses +``int32_t``, see a definition in src/lib/core/datetime.h. + +``tzoffset`` is timezone offset in minutes from UTC. Tarantool uses +``int16_t`` type, see a structure definition in src/lib/core/datetime.h. + +``tzindex`` is Olson timezone id. Tarantool uses ``int16_t`` type, see +a structure definition in src/lib/core/datetime.h. If both +``tzoffset`` and ``tzindex`` are specified, ``tzindex`` has the +preference and the ``tzoffset`` value is ignored. + +.. _datetime RFC: https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c """ -from tarantool.msgpack_ext.types.datetime import Datetime +from tarantool.msgpack_ext.types.datetime import ( + NSEC_IN_SEC, + SEC_IN_MIN, + Datetime, +) +import tarantool.msgpack_ext.types.timezones as tt_timezones + +from tarantool.error import MsgpackError EXT_ID = 4 """ `datetime`_ type id. """ +BYTEORDER = 'little' + +SECONDS_SIZE_BYTES = 8 +NSEC_SIZE_BYTES = 4 +TZOFFSET_SIZE_BYTES = 2 +TZINDEX_SIZE_BYTES = 2 + + +def get_int_as_bytes(data, size): + """ + Get binary representation of integer value. + + :param data: Integer value. + :type data: :obj:`int` + + :param size: Integer size, in bytes. + :type size: :obj:`int` + + :return: Encoded integer. + :rtype: :obj:`bytes` + + :meta private: + """ + + return data.to_bytes(size, byteorder=BYTEORDER, signed=True) + def encode(obj): """ Encode a datetime object. @@ -26,7 +91,48 @@ def encode(obj): :raise: :exc:`tarantool.Datetime.msgpack_encode` exceptions """ - return obj.msgpack_encode() + seconds = obj.value // NSEC_IN_SEC + nsec = obj.nsec + tzoffset = obj.tzoffset + + tz = obj.tz + if tz != '': + tzindex = tt_timezones.timezoneToIndex[tz] + else: + tzindex = 0 + + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) + + if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): + buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) + buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) + + return buf + + +def get_bytes_as_int(data, cursor, size): + """ + Get integer value from binary data. + + :param data: MessagePack binary data. + :type data: :obj:`bytes` + + :param cursor: Index after last parsed byte. + :type cursor: :obj:`int` + + :param size: Integer size, in bytes. + :type size: :obj:`int` + + :return: First value: parsed integer, second value: new cursor + position. + :rtype: first value: :obj:`int`, second value: :obj:`int` + + :meta private: + """ + + part = data[cursor:cursor + size] + return int.from_bytes(part, BYTEORDER, signed=True), cursor + size def decode(data): """ @@ -38,7 +144,35 @@ def decode(data): :return: Decoded datetime. :rtype: :class:`tarantool.Datetime` - :raise: :exc:`tarantool.Datetime` exceptions + :raise: :exc:`~tarantool.error.MsgpackError`, + :exc:`tarantool.Datetime` exceptions """ - return Datetime(data) + cursor = 0 + seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) + + data_len = len(data) + if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \ + TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES): + nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) + tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) + tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) + elif data_len == SECONDS_SIZE_BYTES: + nsec = 0 + tzoffset = 0 + tzindex = 0 + else: + raise MsgpackError(f'Unexpected datetime payload length {data_len}') + + if tzindex != 0: + if tzindex not in tt_timezones.indexToTimezone: + raise MsgpackError(f'Failed to decode datetime with unknown tzindex "{tzindex}"') + tz = tt_timezones.indexToTimezone[tzindex] + return Datetime(timestamp=seconds, nsec=nsec, tz=tz, + timestamp_since_utc_epoch=True) + elif tzoffset != 0: + return Datetime(timestamp=seconds, nsec=nsec, tzoffset=tzoffset, + timestamp_since_utc_epoch=True) + else: + return Datetime(timestamp=seconds, nsec=nsec, + timestamp_since_utc_epoch=True) diff --git a/tarantool/msgpack_ext/types/datetime.py b/tarantool/msgpack_ext/types/datetime.py index b2dac8a9..f5912dda 100644 --- a/tarantool/msgpack_ext/types/datetime.py +++ b/tarantool/msgpack_ext/types/datetime.py @@ -1,41 +1,5 @@ """ -Tarantool `datetime`_ extension type support module. - -The datetime MessagePack representation looks like this: - -.. code-block:: text - - +---------+----------------+==========+-----------------+ - | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | - | = d7/d8 | = 4 | | tzindex; | - +---------+----------------+==========+-----------------+ - -MessagePack data contains: - -* Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the - little-endian order. -* The optional fields (8 bytes), if any of them have a non-zero value. - The fields include nsec (4 bytes), tzoffset (2 bytes), and - tzindex (2 bytes) packed in the little-endian order. - -``seconds`` is seconds since Epoch, where the epoch is the point where -the time starts, and is platform dependent. For Unix, the epoch is -January 1, 1970, 00:00:00 (UTC). Tarantool uses a ``double`` type, see a -structure definition in src/lib/core/datetime.h and reasons in -`datetime RFC`_. - -``nsec`` is nanoseconds, fractional part of seconds. Tarantool uses -``int32_t``, see a definition in src/lib/core/datetime.h. - -``tzoffset`` is timezone offset in minutes from UTC. Tarantool uses -``int16_t`` type, see a structure definition in src/lib/core/datetime.h. - -``tzindex`` is Olson timezone id. Tarantool uses ``int16_t`` type, see -a structure definition in src/lib/core/datetime.h. If both -``tzoffset`` and ``tzindex`` are specified, ``tzindex`` has the -preference and the ``tzoffset`` value is ignored. - -.. _datetime RFC: https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c +Tarantool `datetime`_ extension type implementation module. """ from copy import deepcopy @@ -44,63 +8,14 @@ import pytz import tarantool.msgpack_ext.types.timezones as tt_timezones -from tarantool.error import MsgpackError from tarantool.msgpack_ext.types.interval import Interval, Adjust -SECONDS_SIZE_BYTES = 8 -NSEC_SIZE_BYTES = 4 -TZOFFSET_SIZE_BYTES = 2 -TZINDEX_SIZE_BYTES = 2 - -BYTEORDER = 'little' - NSEC_IN_SEC = 1000000000 NSEC_IN_MKSEC = 1000 SEC_IN_MIN = 60 MONTH_IN_YEAR = 12 -def get_bytes_as_int(data, cursor, size): - """ - Get integer value from binary data. - - :param data: MessagePack binary data. - :type data: :obj:`bytes` - - :param cursor: Index after last parsed byte. - :type cursor: :obj:`int` - - :param size: Integer size, in bytes. - :type size: :obj:`int` - - :return: First value: parsed integer, second value: new cursor - position. - :rtype: first value: :obj:`int`, second value: :obj:`int` - - :meta private: - """ - - part = data[cursor:cursor + size] - return int.from_bytes(part, BYTEORDER, signed=True), cursor + size - -def get_int_as_bytes(data, size): - """ - Get binary representation of integer value. - - :param data: Integer value. - :type data: :obj:`int` - - :param size: Integer size, in bytes. - :type size: :obj:`int` - - :return: Encoded integer. - :rtype: :obj:`bytes` - - :meta private: - """ - - return data.to_bytes(size, byteorder=BYTEORDER, signed=True) - def compute_offset(timestamp): """ Compute timezone offset. Offset is computed each time and not stored @@ -126,7 +41,7 @@ def compute_offset(timestamp): # There is no precision loss since offset is in minutes return int(utc_offset.total_seconds()) // SEC_IN_MIN -def get_python_tzinfo(tz, error_class): +def get_python_tzinfo(tz): """ All non-abbreviated Tarantool timezones are represented as pytz timezones (from :func:`pytz.timezone`). All non-ambiguous @@ -138,9 +53,6 @@ def get_python_tzinfo(tz, error_class): :param tz: Tarantool timezone name. :type tz: :obj:`str` - :param error_class: Error class to raise in case of fail. - :type error_class: :obj:`Exception` - :return: Timezone object. :rtype: :func:`pytz.timezone` result or :class:`pytz.FixedOffset` @@ -155,66 +67,17 @@ def get_python_tzinfo(tz, error_class): # Checked with timezones/validate_timezones.py tt_tzinfo = tt_timezones.timezoneAbbrevInfo[tz] if (tt_tzinfo['category'] & tt_timezones.TZ_AMBIGUOUS) != 0: - raise error_class(f'Failed to create datetime with ambiguous timezone "{tz}"') + raise ValueError(f'Failed to create datetime with ambiguous timezone "{tz}"') return pytz.FixedOffset(tt_tzinfo['offset']) -def msgpack_decode(data): - """ - Decode MsgPack binary data to useful timestamp and timezone data. - For internal use of :class:`~tarantool.Datetime`. - - :param data: MessagePack binary data to decode. - :type data: :obj:`bytes` - - :return: First value: timestamp data with timezone info, second - value: Tarantool timezone name. - :rtype: first value: :class:`pandas.Timestamp`, second value: - :obj:`str` - - :raises: :exc:`~tarantool.error.MsgpackError` - - :meta private: - """ - - cursor = 0 - seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) - - data_len = len(data) - if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \ - TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES): - nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) - tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) - tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) - elif data_len == SECONDS_SIZE_BYTES: - nsec = 0 - tzoffset = 0 - tzindex = 0 - else: - raise MsgpackError(f'Unexpected datetime payload length {data_len}') - - total_nsec = seconds * NSEC_IN_SEC + nsec - datetime = pandas.to_datetime(total_nsec, unit='ns') - - if tzindex != 0: - if tzindex not in tt_timezones.indexToTimezone: - raise MsgpackError(f'Failed to decode datetime with unknown tzindex "{tzindex}"') - tz = tt_timezones.indexToTimezone[tzindex] - tzinfo = get_python_tzinfo(tz, MsgpackError) - return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), tz - elif tzoffset != 0: - tzinfo = pytz.FixedOffset(tzoffset) - return datetime.replace(tzinfo=pytz.UTC).tz_convert(tzinfo), '' - else: - return datetime, '' - class Datetime(): """ Class representing Tarantool `datetime`_ info. Internals are based on :class:`pandas.Timestamp`. - You can create :class:`~tarantool.Datetime` objects either from - MessagePack data or by using the same API as in Tarantool: + You can create :class:`~tarantool.Datetime` objects by using the + same API as in Tarantool: .. code-block:: python @@ -277,14 +140,10 @@ class Datetime(): .. _datetime: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type """ - def __init__(self, data=None, *, timestamp=None, year=None, month=None, + def __init__(self, *, timestamp=None, year=None, month=None, day=None, hour=None, minute=None, sec=None, nsec=None, tzoffset=0, tz='', timestamp_since_utc_epoch=False): """ - :param data: MessagePack binary data to decode. If provided, - all other parameters are ignored. - :type data: :obj:`bytes`, optional - :param timestamp: Timestamp since epoch. Cannot be provided together with :paramref:`~tarantool.Datetime.params.year`, @@ -403,22 +262,12 @@ def __init__(self, data=None, *, timestamp=None, year=None, month=None, .. _datetime.new(): https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ """ - if data is not None: - if not isinstance(data, bytes): - raise ValueError('data argument (first positional argument) ' + - 'expected to be a "bytes" instance') - - datetime, tz = msgpack_decode(data) - self._datetime = datetime - self._tz = tz - return - tzinfo = None if tz != '': if tz not in tt_timezones.timezoneToIndex: raise ValueError(f'Unknown Tarantool timezone "{tz}"') - tzinfo = get_python_tzinfo(tz, ValueError) + tzinfo = get_python_tzinfo(tz) elif tzoffset != 0: tzinfo = pytz.FixedOffset(tzoffset) self._tz = tz @@ -783,29 +632,3 @@ def value(self): """ return self._datetime.value - - def msgpack_encode(self): - """ - Encode a datetime object. - - :rtype: :obj:`bytes` - """ - - seconds = self.value // NSEC_IN_SEC - nsec = self.nsec - tzoffset = self.tzoffset - - tz = self.tz - if tz != '': - tzindex = tt_timezones.timezoneToIndex[tz] - else: - tzindex = 0 - - buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) - - if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): - buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) - buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) - buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) - - return buf diff --git a/test/suites/test_datetime.py b/test/suites/test_datetime.py index 583f9058..fb2d2855 100644 --- a/test/suites/test_datetime.py +++ b/test/suites/test_datetime.py @@ -104,14 +104,14 @@ def test_Datetime_class_API_wth_tz(self): 'positional_year': { 'args': [2022], 'kwargs': {}, - 'type': ValueError, - 'msg': 'data argument (first positional argument) expected to be a "bytes" instance' + 'type': TypeError, + 'msg': '__init__() takes 1 positional argument but 2 were given' }, 'positional_date': { 'args': [2022, 8, 31], 'kwargs': {}, 'type': TypeError, - 'msg': '__init__() takes from 1 to 2 positional arguments but 4 were given' + 'msg': '__init__() takes 1 positional argument but 4 were given' }, 'mixing_date_and_timestamp': { 'args': [], @@ -338,7 +338,7 @@ def test_msgpack_decode_unknown_tzindex(self): def test_msgpack_decode_ambiguous_tzindex(self): case = b'\x4a\x79\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x82\x00' self.assertRaisesRegex( - MsgpackError, 'Failed to create datetime with ambiguous timezone "AET"', + ValueError, 'Failed to create datetime with ambiguous timezone "AET"', lambda: unpacker_ext_hook(4, case)) From db9e5b864e4604b477259563487b2e9e85a3f069 Mon Sep 17 00:00:00 2001 From: Georgy Moiseev Date: Mon, 24 Oct 2022 16:27:05 +0300 Subject: [PATCH 4/4] api: extract interval encode/decode from class Extract tarantool.Interval encode and decode to external functions. This is a breaking change, but since there is no tagged release with Interval yet and API was more internal rather than public, it shouldn't be an issue. Follows #229 --- CHANGELOG.md | 2 + tarantool/msgpack_ext/interval.py | 103 ++++++++++++++++-- tarantool/msgpack_ext/types/interval.py | 136 +++--------------------- test/suites/test_interval.py | 43 +------- 4 files changed, 112 insertions(+), 172 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff7b56d8..eed24c57 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -191,6 +191,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Use git version to set package version (#238). - Extract tarantool.Datetime encode and decode to external functions (PR #252). +- Extract tarantool.Interval encode and decode to external + functions (PR #252). ### Fixed - Package build (#238). diff --git a/tarantool/msgpack_ext/interval.py b/tarantool/msgpack_ext/interval.py index 20a791ef..725edc9d 100644 --- a/tarantool/msgpack_ext/interval.py +++ b/tarantool/msgpack_ext/interval.py @@ -1,12 +1,50 @@ """ Tarantool `datetime.interval`_ extension type support module. -Refer to :mod:`~tarantool.msgpack_ext.types.interval`. +The interval MessagePack representation looks like this: + +.. code-block:: text + + +--------+-------------------------+-------------+----------------+ + | MP_EXT | Size of packed interval | MP_INTERVAL | PackedInterval | + +--------+-------------------------+-------------+----------------+ + +Packed interval consists of: + +* Packed number of non-zero fields. +* Packed non-null fields. + +Each packed field has the following structure: + +.. code-block:: text + + +----------+=====================+ + | field ID | field value | + +----------+=====================+ + +The number of defined (non-null) fields can be zero. In this case, +the packed interval will be encoded as integer 0. + +List of the field IDs: + +* 0 – year +* 1 – month +* 2 – week +* 3 – day +* 4 – hour +* 5 – minute +* 6 – second +* 7 – nanosecond +* 8 – adjust .. _datetime.interval: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-interval-type """ -from tarantool.msgpack_ext.types.interval import Interval +import msgpack + +from tarantool.error import MsgpackError + +from tarantool.msgpack_ext.types.interval import Interval, Adjust, id_map EXT_ID = 6 """ @@ -22,11 +60,25 @@ def encode(obj): :return: Encoded interval. :rtype: :obj:`bytes` - - :raise: :exc:`tarantool.Interval.msgpack_encode` exceptions """ - return obj.msgpack_encode() + buf = bytes() + + count = 0 + for field_id in id_map.keys(): + field_name = id_map[field_id] + value = getattr(obj, field_name) + + if field_name == 'adjust': + value = value.value + + if value != 0: + buf = buf + msgpack.packb(field_id) + msgpack.packb(value) + count = count + 1 + + buf = msgpack.packb(count) + buf + + return buf def decode(data): """ @@ -38,7 +90,44 @@ def decode(data): :return: Decoded interval. :rtype: :class:`tarantool.Interval` - :raise: :exc:`tarantool.Interval` exceptions + :raise: :exc:`MsgpackError` """ - return Interval(data) + # If MessagePack data does not contain a field value, it is zero. + # If built not from MessagePack data, set argument values later. + kwargs = { + 'year': 0, + 'month': 0, + 'week': 0, + 'day': 0, + 'hour': 0, + 'minute': 0, + 'sec': 0, + 'nsec': 0, + 'adjust': Adjust(0), + } + + if len(data) != 0: + # To create an unpacker is the only way to parse + # a sequence of values in Python msgpack module. + unpacker = msgpack.Unpacker() + unpacker.feed(data) + field_count = unpacker.unpack() + for _ in range(field_count): + field_id = unpacker.unpack() + value = unpacker.unpack() + + if field_id not in id_map: + raise MsgpackError(f'Unknown interval field id {field_id}') + + field_name = id_map[field_id] + + if field_name == 'adjust': + try: + value = Adjust(value) + except ValueError as e: + raise MsgpackError(e) + + kwargs[id_map[field_id]] = value + + return Interval(**kwargs) diff --git a/tarantool/msgpack_ext/types/interval.py b/tarantool/msgpack_ext/types/interval.py index 62d98145..e90ff0a0 100644 --- a/tarantool/msgpack_ext/types/interval.py +++ b/tarantool/msgpack_ext/types/interval.py @@ -1,48 +1,9 @@ """ -Tarantool `datetime.interval`_ extension type support module. - -The interval MessagePack representation looks like this: - -.. code-block:: text - - +--------+-------------------------+-------------+----------------+ - | MP_EXT | Size of packed interval | MP_INTERVAL | PackedInterval | - +--------+-------------------------+-------------+----------------+ - -Packed interval consists of: - -* Packed number of non-zero fields. -* Packed non-null fields. - -Each packed field has the following structure: - -.. code-block:: text - - +----------+=====================+ - | field ID | field value | - +----------+=====================+ - -The number of defined (non-null) fields can be zero. In this case, -the packed interval will be encoded as integer 0. - -List of the field IDs: - -* 0 – year -* 1 – month -* 2 – week -* 3 – day -* 4 – hour -* 5 – minute -* 6 – second -* 7 – nanosecond -* 8 – adjust +Tarantool `datetime.interval`_ extension type implementation module. """ -import msgpack from enum import Enum -from tarantool.error import MsgpackError - id_map = { 0: 'year', 1: 'month', @@ -97,14 +58,10 @@ class Interval(): .. _datetime.interval: https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-interval-type """ - def __init__(self, data=None, *, year=0, month=0, week=0, + def __init__(self, *, year=0, month=0, week=0, day=0, hour=0, minute=0, sec=0, nsec=0, adjust=Adjust.NONE): """ - :param data: MessagePack binary data to decode. If provided, - all other parameters are ignored. - :type data: :obj:`bytes`, optional - :param year: Interval year value. :type year: :obj:`int`, optional @@ -132,61 +89,17 @@ def __init__(self, data=None, *, year=0, month=0, week=0, :param adjust: Interval adjustment rule. Refer to :meth:`~tarantool.Datetime.__add__`. :type adjust: :class:`~tarantool.IntervalAdjust`, optional - - :raise: :exc:`ValueError` """ - - # If MessagePack data does not contain a field value, it is zero. - # If built not from MessagePack data, set argument values later. - self.year = 0 - self.month = 0 - self.week = 0 - self.day = 0 - self.hour = 0 - self.minute = 0 - self.sec = 0 - self.nsec = 0 - self.adjust = Adjust(0) - - if data is not None: - if not isinstance(data, bytes): - raise ValueError('data argument (first positional argument) ' + - 'expected to be a "bytes" instance') - - if len(data) == 0: - return - - # To create an unpacker is the only way to parse - # a sequence of values in Python msgpack module. - unpacker = msgpack.Unpacker() - unpacker.feed(data) - field_count = unpacker.unpack() - for _ in range(field_count): - field_id = unpacker.unpack() - value = unpacker.unpack() - - if field_id not in id_map: - raise MsgpackError(f'Unknown interval field id {field_id}') - - field_name = id_map[field_id] - - if field_name == 'adjust': - try: - value = Adjust(value) - except ValueError as e: - raise MsgpackError(e) - - setattr(self, id_map[field_id], value) - else: - self.year = year - self.month = month - self.week = week - self.day = day - self.hour = hour - self.minute = minute - self.sec = sec - self.nsec = nsec - self.adjust = adjust + + self.year = year + self.month = month + self.week = week + self.day = day + self.hour = hour + self.minute = minute + self.sec = sec + self.nsec = nsec + self.adjust = adjust def __add__(self, other): """ @@ -319,28 +232,3 @@ def __repr__(self): f'nsec={self.nsec}, adjust={self.adjust})' __str__ = __repr__ - - def msgpack_encode(self): - """ - Encode an interval object. - - :rtype: :obj:`bytes` - """ - - buf = bytes() - - count = 0 - for field_id in id_map.keys(): - field_name = id_map[field_id] - value = getattr(self, field_name) - - if field_name == 'adjust': - value = value.value - - if value != 0: - buf = buf + msgpack.packb(field_id) + msgpack.packb(value) - count = count + 1 - - buf = msgpack.packb(count) + buf - - return buf diff --git a/test/suites/test_interval.py b/test/suites/test_interval.py index 63a3c0ca..2de70a11 100644 --- a/test/suites/test_interval.py +++ b/test/suites/test_interval.py @@ -57,50 +57,11 @@ def setUp(self): self.adm("box.space['test']:truncate()") - def test_Interval_bytes_init(self): - dt = tarantool.Interval(b'\x02\x00\x01\x08\x01') - - self.assertEqual(dt.year, 1) - self.assertEqual(dt.month, 0) - self.assertEqual(dt.day, 0) - self.assertEqual(dt.hour, 0) - self.assertEqual(dt.minute, 0) - self.assertEqual(dt.sec, 0) - self.assertEqual(dt.nsec, 0) - self.assertEqual(dt.adjust, tarantool.IntervalAdjust.NONE) - - def test_Interval_non_bytes_positional_init(self): + def test_Interval_positional_init(self): self.assertRaisesRegex( - ValueError, re.escape('data argument (first positional argument) ' + - 'expected to be a "bytes" instance'), + TypeError, re.escape('__init__() takes 1 positional argument but 2 were given'), lambda: tarantool.Interval(1)) - def test_Interval_bytes_init_ignore_other_fields(self): - dt = tarantool.Interval(b'\x02\x00\x01\x08\x01', - year=2, month=2, day=3, hour=1, minute=2, - sec=3000, nsec=10000000, - adjust=tarantool.IntervalAdjust.LAST) - - self.assertEqual(dt.year, 1) - self.assertEqual(dt.month, 0) - self.assertEqual(dt.day, 0) - self.assertEqual(dt.hour, 0) - self.assertEqual(dt.minute, 0) - self.assertEqual(dt.sec, 0) - self.assertEqual(dt.nsec, 0) - self.assertEqual(dt.adjust, tarantool.IntervalAdjust.NONE) - - def test_Interval_bytes_init_unknown_field(self): - self.assertRaisesRegex( - MsgpackError, 'Unknown interval field id 9', - lambda: tarantool.Interval(b'\x01\x09\xce\x00\x98\x96\x80')) - - def test_Interval_bytes_init_unknown_adjust(self): - self.assertRaisesRegex( - MsgpackError, '3 is not a valid Adjust', - lambda: tarantool.Interval(b'\x02\x07\xce\x00\x98\x96\x80\x08\x03')) - - cases = { 'year': { 'python': tarantool.Interval(year=1),