Skip to content

Commit a2d8b3d

Browse files
committed
BUG: store datetime.date objects in HDFStore as ordinals rather then timetuples to avoid timezone issues (GH2852),
thanks @tavistmorph and @numpand
1 parent 78e3ba7 commit a2d8b3d

File tree

4 files changed

+66
-8
lines changed

4 files changed

+66
-8
lines changed

doc/source/release.rst

+6-2
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,10 @@ Improvements to existing features
114114
- ``Panel.to_excel()`` now accepts keyword arguments that will be passed to
115115
its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`)
116116
- allow DataFrame constructor to accept more list-like objects, e.g. list of
117-
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`)
118-
- DataFrame constructor now accepts a numpy masked record array (:issue:`3478`)
117+
``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`),
118+
thanks @lgautier
119+
- DataFrame constructor now accepts a numpy masked record array (:issue:`3478`),
120+
thanks @jnothman
119121

120122
API Changes
121123
~~~~~~~~~~~
@@ -168,6 +170,8 @@ API Changes
168170
with data_columns on the same axis
169171
- ``select_as_coordinates`` will now return an ``Int64Index`` of the resultant selection set
170172
- support ``timedelta64[ns]`` as a serialization type (:issue:`3577`)
173+
- store `datetime.date` objects as ordinals rather then timetuples to avoid timezone issues (:issue:`2852`),
174+
thanks @tavistmorph and @numpand
171175
- ``JSON``
172176

173177
- added ``date_unit`` parameter to specify resolution of timestamps. Options

doc/source/v0.13.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ API changes
103103
- add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written
104104
to the store (default is ``True``, ALL nan rows are NOT written), also settable
105105
via the option ``io.hdf.dropna_table`` (:issue:`4625`)
106+
- store `datetime.date` objects as ordinals rather then timetuples to avoid timezone issues (:issue:`2852`),
107+
thanks @tavistmorph and @numpand
106108

107109
- Changes to how ``Index`` and ``MultiIndex`` handle metadata (``levels``,
108110
``labels``, and ``names``) (:issue:`4039`):

pandas/io/pytables.py

+17-6
Original file line numberDiff line numberDiff line change
@@ -1740,8 +1740,12 @@ def convert(self, values, nan_rep, encoding):
17401740
elif dtype == u('timedelta64'):
17411741
self.data = np.asarray(self.data, dtype='m8[ns]')
17421742
elif dtype == u('date'):
1743-
self.data = np.array(
1744-
[date.fromtimestamp(v) for v in self.data], dtype=object)
1743+
try:
1744+
self.data = np.array(
1745+
[date.fromordinal(v) for v in data], dtype=object)
1746+
except (ValueError):
1747+
self.data = np.array(
1748+
[date.fromtimestamp(v) for v in self.data], dtype=object)
17451749
elif dtype == u('datetime'):
17461750
self.data = np.array(
17471751
[datetime.fromtimestamp(v) for v in self.data],
@@ -3769,7 +3773,7 @@ def _convert_index(index, encoding=None):
37693773
return IndexCol(converted, 'datetime', _tables().Time64Col(),
37703774
index_name=index_name)
37713775
elif inferred_type == 'date':
3772-
converted = np.array([time.mktime(v.timetuple()) for v in values],
3776+
converted = np.array([v.toordinal() for v in values],
37733777
dtype=np.int32)
37743778
return IndexCol(converted, 'date', _tables().Time32Col(),
37753779
index_name=index_name)
@@ -3809,7 +3813,12 @@ def _unconvert_index(data, kind, encoding=None):
38093813
index = np.array([datetime.fromtimestamp(v) for v in data],
38103814
dtype=object)
38113815
elif kind == u('date'):
3812-
index = np.array([date.fromtimestamp(v) for v in data], dtype=object)
3816+
try:
3817+
index = np.array(
3818+
[date.fromordinal(v) for v in data], dtype=object)
3819+
except (ValueError):
3820+
index = np.array(
3821+
[date.fromtimestamp(v) for v in self.data], dtype=object)
38133822
elif kind in (u('integer'), u('float')):
38143823
index = np.array(data)
38153824
elif kind in (u('string')):
@@ -4096,10 +4105,12 @@ def stringify(value):
40964105
elif kind == u('timedelta64') or kind == u('timedelta'):
40974106
v = _coerce_scalar_to_timedelta_type(v,unit='s').item()
40984107
return TermValue(int(v), v, kind)
4099-
elif (isinstance(v, datetime) or hasattr(v, 'timetuple')
4100-
or kind == u('date')):
4108+
elif (isinstance(v, datetime) or hasattr(v, 'timetuple')):
41014109
v = time.mktime(v.timetuple())
41024110
return TermValue(v, Timestamp(v), kind)
4111+
elif kind == u('date'):
4112+
v = v.toordinal()
4113+
return TermValue(v, Timestamp.fromordinal(v), kind)
41034114
elif kind == u('integer'):
41044115
v = int(float(v))
41054116
return TermValue(v, v, kind)

pandas/io/tests/test_pytables.py

+41
Original file line numberDiff line numberDiff line change
@@ -1799,6 +1799,47 @@ def compare(a,b):
17991799
result = store.select('df')
18001800
assert_frame_equal(result,df)
18011801

1802+
def test_store_timezone(self):
1803+
# GH2852
1804+
# issue storing datetime.date with a timezone as it resets when read back in a new timezone
1805+
1806+
import platform
1807+
if platform.system() == "Windows":
1808+
raise nose.SkipTest("timezone setting not supported on windows")
1809+
1810+
import datetime
1811+
import time
1812+
import os
1813+
1814+
orig_tz = os.environ.get('TZ')
1815+
1816+
def setTZ(tz):
1817+
if tz is None:
1818+
try:
1819+
del os.environ['TZ']
1820+
except:
1821+
pass
1822+
else:
1823+
os.environ['TZ']=tz
1824+
time.tzset()
1825+
1826+
try:
1827+
1828+
with ensure_clean(self.path) as store:
1829+
1830+
setTZ('EST5EDT')
1831+
today = datetime.date(2013,9,10)
1832+
df = DataFrame([1,2,3], index = [today, today, today])
1833+
store['obj1'] = df
1834+
1835+
setTZ('CST6CDT')
1836+
result = store['obj1']
1837+
1838+
assert_frame_equal(result, df)
1839+
1840+
finally:
1841+
setTZ(orig_tz)
1842+
18021843
def test_append_with_timedelta(self):
18031844
if _np_version_under1p7:
18041845
raise nose.SkipTest("requires numpy >= 1.7")

0 commit comments

Comments
 (0)