Skip to content

Commit c8ae392

Browse files
msgpack: support tzindex in datetime
Support non-zero tzindex in datetime extended type. If both tzoffset and tzindex are specified, tzindex is prior (same as in Tarantool [1]). Tarantool index to Olson name map and inverted one are built with gen_timezones.sh script based on tarantool/go-tarantool script [2]. If possible, Tarantool timezone is decoded to pytz [3] timezone. pytz is based on Olson tz database, same as Tarantool. If Tarantool timezone name is not supported by pytz (most abbreviated timezones, like `'MSK'`), we use `datetime.timezone(datetime.timedelta(minutes=offset), name=tzname)`. There are some pytz timezones not supported by Tarantool: CST6CDT, EST5EDT, MET, MST7MDT, PST8PDT, Europe/Kyiv and all Etc/GMT* timezones (except for Etc/GMT, Etc/GMT+0, Etc/GMT-0). The exception is raised in this case. If ambiguous Tarantool timezone is specified, the exception is raised. 1. https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ 2. https://github.com/tarantool/go-tarantool/blob/5801dc6f5ce69db7c8bc0c0d0fe4fb6042d5ecbc/datetime/gen-timezones.sh 3. https://pypi.org/project/pytz/ Closes #204
1 parent a441bb1 commit c8ae392

File tree

7 files changed

+2094
-2
lines changed

7 files changed

+2094
-2
lines changed

CHANGELOG.md

+15
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
3636
In-built `datetime.timezone(datetime.timedelta(minutes=offset))`
3737
is used to store offset timezones.
3838

39+
- Timezone in datetime type support (#204).
40+
41+
If possible, Tarantool timezone is decoded to pytz timezone.
42+
pytz is based on Olson tz database, same as Tarantool. If Tarantool
43+
timezone name is not supported by pytz (most abbreviated timezones,
44+
like `'MSK'`), we use
45+
`datetime.timezone(datetime.timedelta(minutes=offset), name=tzname)`.
46+
47+
There are some pytz timezones not supported by Tarantool:
48+
CST6CDT, EST5EDT, MET, MST7MDT, PST8PDT, Europe/Kyiv and
49+
all Etc/GMT* timezones (except for Etc/GMT, Etc/GMT+0, Etc/GMT-0).
50+
The exception is raised in this case.
51+
52+
If ambiguous Tarantool timezone is specified, the exception is raised.
53+
3954
### Changed
4055
- Bump msgpack requirement to 1.0.4 (PR #223).
4156
The only reason of this bump is various vulnerability fixes,

tarantool/msgpack_ext/types/datetime.py

+94-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
from copy import deepcopy
22

33
import datetime
4+
import pytz
45
import pandas
56

7+
import tarantool.msgpack_ext.types.timezones as tt_timezones
8+
from tarantool.error import MsgpackError, MsgpackWarning, warn
9+
610
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
711
#
812
# The datetime MessagePack representation looks like this:
@@ -45,6 +49,7 @@
4549
SEC_IN_MIN = 60
4650
MIN_IN_DAY = 60 * 24
4751

52+
DATETIME_TIMEZONE_NAME_POS = 1
4853

4954
def get_bytes_as_int(data, cursor, size):
5055
part = data[cursor:cursor + size]
@@ -65,6 +70,80 @@ def compute_offset(timestamp):
6570
# There is no precision loss since offset is in minutes
6671
return utc_offset.days * MIN_IN_DAY + utc_offset.seconds // SEC_IN_MIN
6772

73+
def get_python_tzname(timestamp):
74+
tzinfo = timestamp.tzinfo
75+
76+
if tzinfo is None:
77+
return None
78+
79+
if isinstance(tzinfo, pytz.tzinfo.BaseTzInfo):
80+
return tzinfo.zone
81+
82+
if isinstance(tzinfo, pytz._FixedOffset):
83+
# pytz.FixedOffset(0) is actually pytz.utc timezone and
84+
# even not a pytz._FixedOffset.
85+
return None
86+
87+
if isinstance(tzinfo, datetime.timezone):
88+
# The only way to legally obtain datetime.timezone name is timezone.tzname(dt).
89+
#
90+
# But if name is not provided in the constructor, the name returned
91+
# by tzname(dt) is generated from the value of the offset.
92+
# We want to get `None` if name was not provided in the constructor.
93+
# The only way to work with such behavior is to mess with init attributes.
94+
#
95+
# https://github.com/python/cpython/blob/1756ffd66a38755cd45de51316d66266ae30e132/Lib/datetime.py#L2323-L2327
96+
initargs = tzinfo.__getinitargs__()
97+
if len(initargs) > DATETIME_TIMEZONE_NAME_POS:
98+
return initargs[DATETIME_TIMEZONE_NAME_POS]
99+
100+
return None
101+
102+
if isinstance(tzinfo, datetime.tzinfo):
103+
# If custom class, tzinfo is expected to have tzname(dt) method
104+
# https://github.com/python/cpython/blob/1756ffd66a38755cd45de51316d66266ae30e132/Lib/datetime.py#L1591
105+
return tzinfo.tzname(timestamp)
106+
107+
raise ValueError(f'Unsupported timezone type {type(tzinfo)}')
108+
109+
110+
def is_abbrev_tz(tzname):
111+
return tzname in tt_timezones.timezoneAbbrevInfo
112+
113+
def assert_nonambiguous_tz(tzname, tt_tzinfo, error_class):
114+
if (tt_tzinfo['category'] & tt_timezones.TZ_AMBIGUOUS) != 0:
115+
raise error_class(f'Failed to create datetime with ambiguous timezone "{tzname}"')
116+
117+
def get_python_tzinfo(tzindex):
118+
if tzindex not in tt_timezones.indexToTimezone:
119+
raise MsgpackError(f'Failed to create datetime with unknown tzindex {tzindex}')
120+
121+
tzname = tt_timezones.indexToTimezone[tzindex]
122+
123+
try:
124+
tzinfo = pytz.timezone(tzname)
125+
except pytz.exceptions.UnknownTimeZoneError:
126+
tt_tzinfo = tt_timezones.timezoneAbbrevInfo[tzname]
127+
assert_nonambiguous_tz(tzname, tt_tzinfo, MsgpackError)
128+
129+
tzinfo = datetime.timezone(datetime.timedelta(minutes=tt_tzinfo['offset']),
130+
name=tzname)
131+
132+
return tzinfo
133+
134+
def validate_python_timezone(timestamp):
135+
tzname = get_python_tzname(timestamp)
136+
137+
if (tzname is not None):
138+
if tzname not in tt_timezones.timezoneToIndex:
139+
raise ValueError(f'Failed to create datetime with unknown timezone "{tzname}"')
140+
141+
if not is_abbrev_tz(tzname):
142+
return
143+
144+
tt_tzinfo = tt_timezones.timezoneAbbrevInfo[tzname]
145+
assert_nonambiguous_tz(tzname, tt_tzinfo, ValueError)
146+
68147
def msgpack_decode(data):
69148
cursor = 0
70149
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
@@ -85,7 +164,10 @@ def msgpack_decode(data):
85164
total_nsec = seconds * NSEC_IN_SEC + nsec
86165

87166
if (tzindex != 0):
88-
raise NotImplementedError
167+
tzinfo = get_python_tzinfo(tzindex)
168+
timestamp = pandas.to_datetime(total_nsec, unit='ns')\
169+
.replace(tzinfo=datetime.timezone.utc)\
170+
.tz_convert(tzinfo)
89171
elif (tzoffset != 0):
90172
tzinfo = datetime.timezone(datetime.timedelta(minutes=tzoffset))
91173
return pandas.to_datetime(total_nsec, unit='ns')\
@@ -106,6 +188,7 @@ def __init__(self, *args, **kwargs):
106188
return
107189

108190
if isinstance(data, pandas.Timestamp):
191+
validate_python_timezone(data)
109192
self._timestamp = deepcopy(data)
110193
return
111194

@@ -114,6 +197,7 @@ def __init__(self, *args, **kwargs):
114197
return
115198
else:
116199
self._timestamp = pandas.Timestamp(*args, **kwargs)
200+
validate_python_timezone(self._timestamp)
117201
return
118202

119203
def __eq__(self, other):
@@ -133,13 +217,21 @@ def __repr__(self):
133217
def to_pd_timestamp(self):
134218
return deepcopy(self._timestamp)
135219

220+
def tzindex(self):
221+
return deepcopy(self._tzindex)
222+
136223
def msgpack_encode(self):
137224
ts_value = self._timestamp.value
138225

139226
seconds = ts_value // NSEC_IN_SEC
140227
nsec = ts_value % NSEC_IN_SEC
228+
141229
tzoffset = compute_offset(self._timestamp)
142-
tzindex = 0
230+
tzname = get_python_tzname(self._timestamp)
231+
if tzname is not None:
232+
tzindex = tt_timezones.timezoneToIndex[tzname]
233+
else:
234+
tzindex = 0
143235

144236
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
145237

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.timezones.timezones import (
2+
TZ_AMBIGUOUS,
3+
indexToTimezone,
4+
timezoneToIndex,
5+
timezoneAbbrevInfo,
6+
)
7+
8+
__all__ = ['TZ_AMBIGUOUS', 'indexToTimezone', 'timezoneToIndex',
9+
'timezoneAbbrevInfo']
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
#!/usr/bin/env bash
2+
set -xeuo pipefail
3+
4+
SRC_COMMIT="9ee45289e01232b8df1413efea11db170ae3b3b4"
5+
SRC_FILE=timezones.h
6+
DST_FILE=timezones.py
7+
8+
[ -e ${SRC_FILE} ] && rm ${SRC_FILE}
9+
wget -O ${SRC_FILE} \
10+
https://raw.githubusercontent.com/tarantool/tarantool/${SRC_COMMIT}/src/lib/tzcode/timezones.h
11+
12+
# We don't need aliases in indexToTimezone because Tarantool always replace it:
13+
#
14+
# tarantool> T = date.parse '2022-01-01T00:00 Pacific/Enderbury'
15+
# ---
16+
# ...
17+
# tarantool> T
18+
# ---
19+
# - 2022-01-01T00:00:00 Pacific/Kanton
20+
# ...
21+
#
22+
# So we can do the same and don't worry, be happy.
23+
24+
cat <<EOF > ${DST_FILE}
25+
# Automatically generated by gen-timezones.sh
26+
27+
TZ_UTC = 0x01
28+
TZ_RFC = 0x02
29+
TZ_MILITARY = 0x04
30+
TZ_AMBIGUOUS = 0x08
31+
TZ_NYI = 0x10
32+
TZ_OLSON = 0x20
33+
TZ_ALIAS = 0x40
34+
TZ_DST = 0x80
35+
36+
indexToTimezone = {
37+
EOF
38+
39+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
40+
| awk '{printf("\t%s : %s,\n", $1, $3)}' >> ${DST_FILE}
41+
grep ZONE_UNIQUE ${SRC_FILE} | sed "s/ZONE_UNIQUE( *//g" | sed "s/[),]//g" \
42+
| awk '{printf("\t%s : %s,\n", $1, $2)}' >> ${DST_FILE}
43+
44+
cat <<EOF >> ${DST_FILE}
45+
}
46+
47+
timezoneToIndex = {
48+
EOF
49+
50+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
51+
| awk '{printf("\t%s : %s,\n", $3, $1)}' >> ${DST_FILE}
52+
grep ZONE_UNIQUE ${SRC_FILE} | sed "s/ZONE_UNIQUE( *//g" | sed "s/[),]//g" \
53+
| awk '{printf("\t%s : %s,\n", $2, $1)}' >> ${DST_FILE}
54+
grep ZONE_ALIAS ${SRC_FILE} | sed "s/ZONE_ALIAS( *//g" | sed "s/[),]//g" \
55+
| awk '{printf("\t%s : %s,\n", $2, $1)}' >> ${DST_FILE}
56+
57+
cat <<EOF >> ${DST_FILE}
58+
}
59+
60+
timezoneAbbrevInfo = {
61+
EOF
62+
63+
grep ZONE_ABBREV ${SRC_FILE} | sed "s/ZONE_ABBREV( *//g" | sed "s/[),]//g" \
64+
| awk '{printf("\t%s : {\"offset\" : %d, \"category\" : %s},\n", $3, $2, $4)}' >> ${DST_FILE}
65+
echo "}" >> ${DST_FILE}
66+
67+
rm timezones.h
68+
69+
python validate_timezones.py

0 commit comments

Comments
 (0)