Skip to content

Commit b8c5c67

Browse files
committed
Merge pull request #4099 from jreback/hdf_tz
BUG: GH4098, HDFStore not recreating a datetime index properly when has a timezone
2 parents cdb3b2c + 134daed commit b8c5c67

File tree

3 files changed

+253
-218
lines changed

3 files changed

+253
-218
lines changed

doc/source/release.rst

+7-7
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ pandas 0.12
5252
- A ``filter`` method on grouped Series or DataFrames returns a subset of
5353
the original (:issue:`3680`, :issue:`919`)
5454
- Access to historical Google Finance data in pandas.io.data (:issue:`3814`)
55-
- DataFrame plotting methods can sample column colors from a Matplotlib
55+
- DataFrame plotting methods can sample column colors from a Matplotlib
5656
colormap via the ``colormap`` keyword. (:issue:`3860`)
5757

5858
**Improvements to existing features**
@@ -63,7 +63,7 @@ pandas 0.12
6363
- ``convert_objects`` now accepts a ``copy`` parameter (defaults to ``True``)
6464
- ``HDFStore``
6565

66-
- will retain index attributes (freq,tz,name) on recreation (:issue:`3499`)
66+
- will retain index attributes (freq,tz,name) on recreation (:issue:`3499`,:issue:`4098`)
6767
- will warn with a ``AttributeConflictWarning`` if you are attempting to append
6868
an index with a different frequency than the existing, or attempting
6969
to append an index with a different name than the existing
@@ -158,7 +158,7 @@ pandas 0.12
158158
- removed ``clipboard`` support to ``pandas.io.clipboard``
159159
- replace top-level and instance methods ``save`` and ``load`` with
160160
top-level ``read_pickle`` and ``to_pickle`` instance method, ``save`` and
161-
``load`` will give deprecation warning.
161+
``load`` will give deprecation warning.
162162
- the ``method`` and ``axis`` arguments of ``DataFrame.replace()`` are
163163
deprecated
164164
- set FutureWarning to require data_source, and to replace year/month with
@@ -215,7 +215,7 @@ pandas 0.12
215215
- Extend ``reindex`` to correctly deal with non-unique indices (:issue:`3679`)
216216
- ``DataFrame.itertuples()`` now works with frames with duplicate column
217217
names (:issue:`3873`)
218-
- Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
218+
- Bug in non-unique indexing via ``iloc`` (:issue:`4017`); added ``takeable`` argument to
219219
``reindex`` for location-based taking
220220

221221
- Fixed bug in groupby with empty series referencing a variable before assignment. (:issue:`3510`)
@@ -272,16 +272,16 @@ pandas 0.12
272272
- Correctly parse when passed the ``dtype=str`` (or other variable-len string dtypes)
273273
in ``read_csv`` (:issue:`3795`)
274274
- Fix index name not propogating when using ``loc/ix`` (:issue:`3880`)
275-
- Fix groupby when applying a custom function resulting in a returned DataFrame was
275+
- Fix groupby when applying a custom function resulting in a returned DataFrame was
276276
not converting dtypes (:issue:`3911`)
277277
- Fixed a bug where ``DataFrame.replace`` with a compiled regular expression
278278
in the ``to_replace`` argument wasn't working (:issue:`3907`)
279279
- Fixed ``__truediv__`` in Python 2.7 with ``numexpr`` installed to actually do true division when dividing
280280
two integer arrays with at least 10000 cells total (:issue:`3764`)
281281
- Indexing with a string with seconds resolution not selecting from a time index (:issue:`3925`)
282-
- csv parsers would loop infinitely if ``iterator=True`` but no ``chunksize`` was
282+
- csv parsers would loop infinitely if ``iterator=True`` but no ``chunksize`` was
283283
specified (:issue:`3967`), python parser failing with ``chunksize=1``
284-
- Fix index name not propogating when using ``shift``
284+
- Fix index name not propogating when using ``shift``
285285
- Fixed dropna=False being ignored with multi-index stack (:issue:`3997`)
286286
- Fixed flattening of columns when renaming MultiIndex columns DataFrame (:issue:`4004`)
287287
- Fix ``Series.clip`` for datetime series. NA/NaN threshold values will now throw ValueError (:issue:`3996`)

pandas/io/pytables.py

+44-32
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,8 @@ def _tables():
151151
def h5_open(path, mode):
152152
tables = _tables()
153153
return tables.openFile(path, mode)
154-
155-
154+
155+
156156
@contextmanager
157157
def get_store(path, mode='a', complevel=None, complib=None,
158158
fletcher32=False):
@@ -217,7 +217,7 @@ def read_hdf(path_or_buf, key, **kwargs):
217217

218218
# a passed store; user controls open/close
219219
f(path_or_buf, False)
220-
220+
221221
class HDFStore(object):
222222
"""
223223
dict-like IO interface for storing pandas objects in PyTables
@@ -757,7 +757,7 @@ def get_node(self, key):
757757
def get_storer(self, key):
758758
""" return the storer object for a key, raise if not in the file """
759759
group = self.get_node(key)
760-
if group is None:
760+
if group is None:
761761
return None
762762
s = self._create_storer(group)
763763
s.infer_axes()
@@ -810,9 +810,9 @@ def _create_storer(self, group, value = None, table = False, append = False, **k
810810
""" return a suitable Storer class to operate """
811811

812812
def error(t):
813-
raise TypeError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
813+
raise TypeError("cannot properly create the storer for: [%s] [group->%s,value->%s,table->%s,append->%s,kwargs->%s]" %
814814
(t,group,type(value),table,append,kwargs))
815-
815+
816816
pt = _ensure_decoded(getattr(group._v_attrs,'pandas_type',None))
817817
tt = _ensure_decoded(getattr(group._v_attrs,'table_type',None))
818818

@@ -863,7 +863,7 @@ def error(t):
863863
tt = u'appendable_ndim'
864864

865865
else:
866-
866+
867867
# distiguish between a frame/table
868868
tt = u'legacy_panel'
869869
try:
@@ -930,7 +930,7 @@ def _read_group(self, group, **kwargs):
930930

931931
class TableIterator(object):
932932
""" define the iteration interface on a table
933-
933+
934934
Parameters
935935
----------
936936
@@ -974,7 +974,7 @@ def __iter__(self):
974974
yield v
975975

976976
self.close()
977-
977+
978978
def close(self):
979979
if self.auto_close:
980980
self.store.close()
@@ -1003,7 +1003,7 @@ class IndexCol(object):
10031003
_info_fields = ['freq','tz','index_name']
10041004

10051005
def __init__(self, values=None, kind=None, typ=None, cname=None, itemsize=None,
1006-
name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
1006+
name=None, axis=None, kind_attr=None, pos=None, freq=None, tz=None,
10071007
index_name=None, **kwargs):
10081008
self.values = values
10091009
self.kind = kind
@@ -1088,21 +1088,27 @@ def convert(self, values, nan_rep, encoding):
10881088
except:
10891089
pass
10901090

1091+
values =_maybe_convert(values, self.kind, encoding)
1092+
10911093
kwargs = dict()
10921094
if self.freq is not None:
10931095
kwargs['freq'] = _ensure_decoded(self.freq)
1094-
if self.tz is not None:
1095-
kwargs['tz'] = _ensure_decoded(self.tz)
10961096
if self.index_name is not None:
10971097
kwargs['name'] = _ensure_decoded(self.index_name)
10981098
try:
1099-
self.values = Index(_maybe_convert(values, self.kind, self.encoding), **kwargs)
1099+
self.values = Index(values, **kwargs)
11001100
except:
11011101

11021102
# if the output freq is different that what we recorded, then infer it
11031103
if 'freq' in kwargs:
11041104
kwargs['freq'] = 'infer'
11051105
self.values = Index(_maybe_convert(values, self.kind, encoding), **kwargs)
1106+
1107+
# set the timezone if indicated
1108+
# we stored in utc, so reverse to local timezone
1109+
if self.tz is not None:
1110+
self.values = self.values.tz_localize('UTC').tz_convert(_ensure_decoded(self.tz))
1111+
11061112
return self
11071113

11081114
def take_data(self):
@@ -1189,7 +1195,7 @@ def update_info(self, info):
11891195
idx = info[self.name]
11901196
except:
11911197
idx = info[self.name] = dict()
1192-
1198+
11931199
existing_value = idx.get(key)
11941200
if key in idx and value is not None and existing_value != value:
11951201

@@ -1235,7 +1241,7 @@ def is_indexed(self):
12351241

12361242
def convert(self, values, nan_rep, encoding):
12371243
""" set the values from this selection: take = take ownership """
1238-
1244+
12391245
self.values = Int64Index(np.arange(self.table.nrows))
12401246
return self
12411247

@@ -1359,7 +1365,13 @@ def set_atom(self, block, existing_col, min_itemsize, nan_rep, info, encoding=No
13591365
"invalid timezone specification")
13601366

13611367
values = index.tz_convert('UTC').values.view('i8')
1362-
self.tz = tz
1368+
1369+
# store a converted timezone
1370+
zone = tslib.get_timezone(index.tz)
1371+
if zone is None:
1372+
zone = tslib.tot_seconds(index.tz.utcoffset())
1373+
self.tz = zone
1374+
13631375
self.update_info(info)
13641376
self.set_atom_datetime64(block, values.reshape(block.values.shape))
13651377

@@ -1398,7 +1410,7 @@ def set_atom_string(self, block, existing_col, min_itemsize, nan_rep, encoding):
13981410
inferred_type = lib.infer_dtype(col.ravel())
13991411
if inferred_type != 'string':
14001412
raise TypeError("Cannot serialize the column [%s] because\n"
1401-
"its data contents are [%s] object dtype" %
1413+
"its data contents are [%s] object dtype" %
14021414
(item,inferred_type))
14031415

14041416

@@ -1607,7 +1619,7 @@ def __repr__(self):
16071619
s = "[%s]" % ','.join([ str(x) for x in s ])
16081620
return "%-12.12s (shape->%s)" % (self.pandas_type,s)
16091621
return self.pandas_type
1610-
1622+
16111623
def __str__(self):
16121624
return self.__repr__()
16131625

@@ -1929,7 +1941,7 @@ def write_array_empty(self, key, value):
19291941
self._handle.createArray(self.group, key, arr)
19301942
getattr(self.group, key)._v_attrs.value_type = str(value.dtype)
19311943
getattr(self.group, key)._v_attrs.shape = value.shape
1932-
1944+
19331945
def write_array(self, key, value, items=None):
19341946
if key in self.group:
19351947
self._handle.removeNode(self.group, key)
@@ -2142,7 +2154,7 @@ def shape(self):
21422154
try:
21432155
ndim = self.ndim
21442156

2145-
# items
2157+
# items
21462158
items = 0
21472159
for i in range(self.nblocks):
21482160
node = getattr(self.group, 'block%d_items' % i)
@@ -2212,7 +2224,7 @@ class PanelStorer(BlockManagerStorer):
22122224
pandas_kind = u'wide'
22132225
obj_type = Panel
22142226
is_shape_reversed = True
2215-
2227+
22162228
def write(self, obj, **kwargs):
22172229
obj._consolidate_inplace()
22182230
return super(PanelStorer, self).write(obj, **kwargs)
@@ -2270,7 +2282,7 @@ def __repr__(self):
22702282
self.ncols,
22712283
','.join([ a.name for a in self.index_axes ]),
22722284
dc)
2273-
2285+
22742286
def __getitem__(self, c):
22752287
""" return the axis for c """
22762288
for a in self.axes:
@@ -2568,7 +2580,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
25682580
try:
25692581
axes = _AXES_MAP[type(obj)]
25702582
except:
2571-
raise TypeError("cannot properly create the storer for: [group->%s,value->%s]" %
2583+
raise TypeError("cannot properly create the storer for: [group->%s,value->%s]" %
25722584
(self.group._v_name,type(obj)))
25732585

25742586
# map axes to numbers
@@ -2597,7 +2609,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
25972609
# nan_representation
25982610
if nan_rep is None:
25992611
nan_rep = 'nan'
2600-
2612+
26012613
self.nan_rep = nan_rep
26022614

26032615
# create axes to index and non_index
@@ -2665,7 +2677,7 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None,
26652677
name = b.items[0]
26662678
self.data_columns.append(name)
26672679

2668-
# make sure that we match up the existing columns
2680+
# make sure that we match up the existing columns
26692681
# if we have an existing table
26702682
if existing_table is not None and validate:
26712683
try:
@@ -2740,7 +2752,7 @@ def process_filter(field, filt):
27402752
return obj.ix._getitem_axis(takers,axis=axis_number)
27412753

27422754
raise ValueError("cannot find the field [%s] for filtering!" % field)
2743-
2755+
27442756
obj = process_filter(field, filt)
27452757

27462758
return obj
@@ -3053,7 +3065,7 @@ def write_data_chunk(self, indexes, mask, search, values):
30533065
self.table.flush()
30543066
except (Exception), detail:
30553067
raise Exception("tables cannot write this data -> %s" % str(detail))
3056-
3068+
30573069
def delete(self, where=None, **kwargs):
30583070

30593071
# delete all rows (and return the nrows)
@@ -3113,7 +3125,7 @@ class AppendableFrameTable(AppendableTable):
31133125
table_type = u'appendable_frame'
31143126
ndim = 2
31153127
obj_type = DataFrame
3116-
3128+
31173129
@property
31183130
def is_transposed(self):
31193131
return self.index_axes[0].axis == 1
@@ -3266,7 +3278,7 @@ def _convert_index(index, encoding=None):
32663278

32673279
if isinstance(index, DatetimeIndex):
32683280
converted = index.asi8
3269-
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
3281+
return IndexCol(converted, 'datetime64', _tables().Int64Col(),
32703282
freq=getattr(index,'freq',None), tz=getattr(index,'tz',None),
32713283
index_name=index_name)
32723284
elif isinstance(index, (Int64Index, PeriodIndex)):
@@ -3382,7 +3394,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None):
33823394

33833395
if nan_rep is None:
33843396
nan_rep = 'nan'
3385-
3397+
33863398
data = lib.string_array_replace_from_nan_rep(data, nan_rep)
33873399
return data.reshape(shape)
33883400

@@ -3421,7 +3433,7 @@ class Term(object):
34213433
value : a value or list of values (required)
34223434
queryables : a kinds map (dict of column name -> kind), or None i column is non-indexable
34233435
encoding : an encoding that will encode the query terms
3424-
3436+
34253437
Returns
34263438
-------
34273439
a Term object
@@ -3582,7 +3594,7 @@ def eval(self):
35823594
if self.is_in_table:
35833595

35843596
self.condition = self.generate(values[0])
3585-
3597+
35863598
else:
35873599

35883600
raise TypeError("passing a filterable condition to a non-table indexer [%s]" % str(self))

0 commit comments

Comments
 (0)