diff --git a/doc/source/release.rst b/doc/source/release.rst index ba7993bfed9bd..791fbc2c516b5 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -114,8 +114,10 @@ Improvements to existing features - ``Panel.to_excel()`` now accepts keyword arguments that will be passed to its ``DataFrame``'s ``to_excel()`` methods. (:issue:`4750`) - allow DataFrame constructor to accept more list-like objects, e.g. list of - ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`) - - DataFrame constructor now accepts a numpy masked record array (:issue:`3478`) + ``collections.Sequence`` and ``array.Array`` objects (:issue:`3783`,:issue:`42971`), + thanks @lgautier + - DataFrame constructor now accepts a numpy masked record array (:issue:`3478`), + thanks @jnothman API Changes ~~~~~~~~~~~ @@ -168,6 +170,8 @@ API Changes with data_columns on the same axis - ``select_as_coordinates`` will now return an ``Int64Index`` of the resultant selection set - support ``timedelta64[ns]`` as a serialization type (:issue:`3577`) + - store `datetime.date` objects as ordinals rather then timetuples to avoid timezone issues (:issue:`2852`), + thanks @tavistmorph and @numpand - ``JSON`` - added ``date_unit`` parameter to specify resolution of timestamps. Options diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index d4c1eba1194ac..02548c9af7dc4 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -103,6 +103,8 @@ API changes - add the keyword ``dropna=True`` to ``append`` to change whether ALL nan rows are not written to the store (default is ``True``, ALL nan rows are NOT written), also settable via the option ``io.hdf.dropna_table`` (:issue:`4625`) + - store `datetime.date` objects as ordinals rather then timetuples to avoid timezone issues (:issue:`2852`), + thanks @tavistmorph and @numpand - Changes to how ``Index`` and ``MultiIndex`` handle metadata (``levels``, ``labels``, and ``names``) (:issue:`4039`): diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9b6a230f6a551..c8224f761ce17 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1740,8 +1740,12 @@ def convert(self, values, nan_rep, encoding): elif dtype == u('timedelta64'): self.data = np.asarray(self.data, dtype='m8[ns]') elif dtype == u('date'): - self.data = np.array( - [date.fromtimestamp(v) for v in self.data], dtype=object) + try: + self.data = np.array( + [date.fromordinal(v) for v in data], dtype=object) + except (ValueError): + self.data = np.array( + [date.fromtimestamp(v) for v in self.data], dtype=object) elif dtype == u('datetime'): self.data = np.array( [datetime.fromtimestamp(v) for v in self.data], @@ -3769,7 +3773,7 @@ def _convert_index(index, encoding=None): return IndexCol(converted, 'datetime', _tables().Time64Col(), index_name=index_name) elif inferred_type == 'date': - converted = np.array([time.mktime(v.timetuple()) for v in values], + converted = np.array([v.toordinal() for v in values], dtype=np.int32) return IndexCol(converted, 'date', _tables().Time32Col(), index_name=index_name) @@ -3809,7 +3813,12 @@ def _unconvert_index(data, kind, encoding=None): index = np.array([datetime.fromtimestamp(v) for v in data], dtype=object) elif kind == u('date'): - index = np.array([date.fromtimestamp(v) for v in data], dtype=object) + try: + index = np.array( + [date.fromordinal(v) for v in data], dtype=object) + except (ValueError): + index = np.array( + [date.fromtimestamp(v) for v in self.data], dtype=object) elif kind in (u('integer'), u('float')): index = np.array(data) elif kind in (u('string')): @@ -4096,10 +4105,12 @@ def stringify(value): elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v,unit='s').item() return TermValue(int(v), v, kind) - elif (isinstance(v, datetime) or hasattr(v, 'timetuple') - or kind == u('date')): + elif (isinstance(v, datetime) or hasattr(v, 'timetuple')): v = time.mktime(v.timetuple()) return TermValue(v, Timestamp(v), kind) + elif kind == u('date'): + v = v.toordinal() + return TermValue(v, Timestamp.fromordinal(v), kind) elif kind == u('integer'): v = int(float(v)) return TermValue(v, v, kind) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 3f4ce72198215..861b4dd7567a0 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1799,6 +1799,47 @@ def compare(a,b): result = store.select('df') assert_frame_equal(result,df) + def test_store_timezone(self): + # GH2852 + # issue storing datetime.date with a timezone as it resets when read back in a new timezone + + import platform + if platform.system() == "Windows": + raise nose.SkipTest("timezone setting not supported on windows") + + import datetime + import time + import os + + orig_tz = os.environ.get('TZ') + + def setTZ(tz): + if tz is None: + try: + del os.environ['TZ'] + except: + pass + else: + os.environ['TZ']=tz + time.tzset() + + try: + + with ensure_clean(self.path) as store: + + setTZ('EST5EDT') + today = datetime.date(2013,9,10) + df = DataFrame([1,2,3], index = [today, today, today]) + store['obj1'] = df + + setTZ('CST6CDT') + result = store['obj1'] + + assert_frame_equal(result, df) + + finally: + setTZ(orig_tz) + def test_append_with_timedelta(self): if _np_version_under1p7: raise nose.SkipTest("requires numpy >= 1.7")