Skip to content

Commit 6a54af8

Browse files
committed
Merge pull request #5507 from jreback/msgpack_bug
BUG: bug in to_msgpack for timezone aware datetime index
2 parents 3239b29 + 693a957 commit 6a54af8

File tree

4 files changed

+54
-26
lines changed

4 files changed

+54
-26
lines changed

doc/source/release.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ Experimental Features
8585
(:issue:`4897`).
8686
- Add msgpack support via ``pd.read_msgpack()`` and ``pd.to_msgpack()`` /
8787
``df.to_msgpack()`` for serialization of arbitrary pandas (and python
88-
objects) in a lightweight portable binary format (:issue:`686`)
88+
objects) in a lightweight portable binary format (:issue:`686`, :issue:`5506`)
8989
- Added PySide support for the qtpandas DataFrameModel and DataFrameWidget.
9090
- Added :mod:`pandas.io.gbq` for reading from (and writing to) Google
9191
BigQuery into a DataFrame. (:issue:`4140`)

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
842842
from pandas.io import pytables
843843
return pytables.to_hdf(path_or_buf, key, self, **kwargs)
844844

845-
def to_msgpack(self, path_or_buf, **kwargs):
845+
def to_msgpack(self, path_or_buf=None, **kwargs):
846846
"""
847847
msgpack (serialize) object to input file path
848848

pandas/io/packers.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,14 @@ def to_msgpack(path_or_buf, *args, **kwargs):
100100
def writer(fh):
101101
for a in args:
102102
fh.write(pack(a, **kwargs))
103-
return fh
104103

105104
if isinstance(path_or_buf, compat.string_types):
106105
with open(path_or_buf, mode) as fh:
107106
writer(fh)
108107
elif path_or_buf is None:
109-
return writer(compat.BytesIO())
108+
buf = compat.BytesIO()
109+
writer(buf)
110+
return buf.getvalue()
110111
else:
111112
writer(path_or_buf)
112113

@@ -263,17 +264,23 @@ def encode(obj):
263264
return {'typ': 'period_index',
264265
'klass': obj.__class__.__name__,
265266
'name': getattr(obj, 'name', None),
266-
'freq': obj.freqstr,
267+
'freq': getattr(obj,'freqstr',None),
267268
'dtype': obj.dtype.num,
268269
'data': convert(obj.asi8)}
269270
elif isinstance(obj, DatetimeIndex):
271+
tz = getattr(obj,'tz',None)
272+
273+
# store tz info and data as UTC
274+
if tz is not None:
275+
tz = tz.zone
276+
obj = obj.tz_convert('UTC')
270277
return {'typ': 'datetime_index',
271278
'klass': obj.__class__.__name__,
272279
'name': getattr(obj, 'name', None),
273280
'dtype': obj.dtype.num,
274281
'data': convert(obj.asi8),
275-
'freq': obj.freqstr,
276-
'tz': obj.tz}
282+
'freq': getattr(obj,'freqstr',None),
283+
'tz': tz }
277284
elif isinstance(obj, MultiIndex):
278285
return {'typ': 'multi_index',
279286
'klass': obj.__class__.__name__,
@@ -440,7 +447,13 @@ def decode(obj):
440447
return globals()[obj['klass']](data, name=obj['name'], freq=obj['freq'])
441448
elif typ == 'datetime_index':
442449
data = unconvert(obj['data'], np.int64, obj.get('compress'))
443-
return globals()[obj['klass']](data, freq=obj['freq'], tz=obj['tz'], name=obj['name'])
450+
result = globals()[obj['klass']](data, freq=obj['freq'], name=obj['name'])
451+
tz = obj['tz']
452+
453+
# reverse tz conversion
454+
if tz is not None:
455+
result = result.tz_localize('UTC').tz_convert(tz)
456+
return result
444457
elif typ == 'series':
445458
dtype = dtype_for(obj['dtype'])
446459
index = obj['index']

pandas/io/tests/test_packers.py

+33-18
Original file line numberDiff line numberDiff line change
@@ -61,29 +61,33 @@ def test_string_io(self):
6161

6262
df = DataFrame(np.random.randn(10,2))
6363
s = df.to_msgpack(None)
64-
result = read_msgpack(s.getvalue())
64+
result = read_msgpack(s)
65+
tm.assert_frame_equal(result,df)
66+
67+
s = df.to_msgpack()
68+
result = read_msgpack(s)
69+
tm.assert_frame_equal(result,df)
70+
71+
s = df.to_msgpack()
72+
result = read_msgpack(compat.BytesIO(s))
6573
tm.assert_frame_equal(result,df)
6674

6775
s = to_msgpack(None,df)
68-
result = read_msgpack(s.getvalue())
76+
result = read_msgpack(s)
6977
tm.assert_frame_equal(result, df)
7078

7179
with ensure_clean(self.path) as p:
7280

73-
s = df.to_msgpack(None)
81+
s = df.to_msgpack()
7482
fh = open(p,'wb')
75-
fh.write(s.getvalue())
83+
fh.write(s)
7684
fh.close()
7785
result = read_msgpack(p)
7886
tm.assert_frame_equal(result, df)
7987

8088
def test_iterator_with_string_io(self):
8189

8290
dfs = [ DataFrame(np.random.randn(10,2)) for i in range(5) ]
83-
s = to_msgpack(None,*dfs)
84-
for i, result in enumerate(read_msgpack(s.getvalue(),iterator=True)):
85-
tm.assert_frame_equal(result,dfs[i])
86-
8791
s = to_msgpack(None,*dfs)
8892
for i, result in enumerate(read_msgpack(s,iterator=True)):
8993
tm.assert_frame_equal(result,dfs[i])
@@ -98,7 +102,7 @@ def test_numpy_scalar_float(self):
98102
def test_numpy_scalar_complex(self):
99103
x = np.complex64(np.random.rand() + 1j * np.random.rand())
100104
x_rec = self.encode_decode(x)
101-
tm.assert_almost_equal(x,x_rec)
105+
self.assert_(np.allclose(x, x_rec))
102106

103107
def test_scalar_float(self):
104108
x = np.random.rand()
@@ -108,10 +112,9 @@ def test_scalar_float(self):
108112
def test_scalar_complex(self):
109113
x = np.random.rand() + 1j * np.random.rand()
110114
x_rec = self.encode_decode(x)
111-
tm.assert_almost_equal(x,x_rec)
115+
self.assert_(np.allclose(x, x_rec))
112116

113117
def test_list_numpy_float(self):
114-
raise nose.SkipTest('buggy test')
115118
x = [np.float32(np.random.rand()) for i in range(5)]
116119
x_rec = self.encode_decode(x)
117120
tm.assert_almost_equal(x,x_rec)
@@ -120,13 +123,11 @@ def test_list_numpy_float_complex(self):
120123
if not hasattr(np, 'complex128'):
121124
raise nose.SkipTest('numpy cant handle complex128')
122125

123-
# buggy test
124-
raise nose.SkipTest('buggy test')
125126
x = [np.float32(np.random.rand()) for i in range(5)] + \
126127
[np.complex128(np.random.rand() + 1j * np.random.rand())
127128
for i in range(5)]
128129
x_rec = self.encode_decode(x)
129-
tm.assert_almost_equal(x,x_rec)
130+
self.assert_(np.allclose(x, x_rec))
130131

131132
def test_list_float(self):
132133
x = [np.random.rand() for i in range(5)]
@@ -137,7 +138,7 @@ def test_list_float_complex(self):
137138
x = [np.random.rand() for i in range(5)] + \
138139
[(np.random.rand() + 1j * np.random.rand()) for i in range(5)]
139140
x_rec = self.encode_decode(x)
140-
tm.assert_almost_equal(x,x_rec)
141+
self.assert_(np.allclose(x, x_rec))
141142

142143
def test_dict_float(self):
143144
x = {'foo': 1.0, 'bar': 2.0}
@@ -147,7 +148,8 @@ def test_dict_float(self):
147148
def test_dict_complex(self):
148149
x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j}
149150
x_rec = self.encode_decode(x)
150-
tm.assert_almost_equal(x,x_rec)
151+
self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
152+
all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))
151153

152154
def test_dict_numpy_float(self):
153155
x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)}
@@ -158,7 +160,9 @@ def test_dict_numpy_complex(self):
158160
x = {'foo': np.complex128(
159161
1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)}
160162
x_rec = self.encode_decode(x)
161-
tm.assert_almost_equal(x,x_rec)
163+
self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and
164+
all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values())))
165+
162166

163167
def test_numpy_array_float(self):
164168

@@ -173,7 +177,8 @@ def test_numpy_array_float(self):
173177
def test_numpy_array_complex(self):
174178
x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128)
175179
x_rec = self.encode_decode(x)
176-
tm.assert_almost_equal(x,x_rec)
180+
self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and
181+
x.dtype == x_rec.dtype)
177182

178183
def test_list_mixed(self):
179184
x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')]
@@ -235,6 +240,16 @@ def test_basic_index(self):
235240
i_rec = self.encode_decode(i)
236241
self.assert_(i.equals(i_rec))
237242

243+
# datetime with no freq (GH5506)
244+
i = Index([Timestamp('20130101'),Timestamp('20130103')])
245+
i_rec = self.encode_decode(i)
246+
self.assert_(i.equals(i_rec))
247+
248+
# datetime with timezone
249+
i = Index([Timestamp('20130101 9:00:00'),Timestamp('20130103 11:00:00')]).tz_localize('US/Eastern')
250+
i_rec = self.encode_decode(i)
251+
self.assert_(i.equals(i_rec))
252+
238253
def test_multi_index(self):
239254

240255
for s, i in self.mi.items():

0 commit comments

Comments
 (0)