Skip to content

Commit 76d4aab

Browse files
committed
BUG: Properly read Categorical msgpacks
Patches bug in read_msgpack in which Series categoricals were accidentally being constructed with a non-categorical dtype, resulting in an error. Closes pandas-devgh-14901.
1 parent f11501a commit 76d4aab

File tree

3 files changed

+16
-9
lines changed

3 files changed

+16
-9
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ Bug Fixes
248248

249249

250250

251+
- Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)
251252

252253

253254

pandas/io/packers.py

+11-9
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@
7070
move_into_mutable_buffer as _move_into_mutable_buffer,
7171
)
7272

73-
# check whcih compression libs we have installed
73+
# check which compression libs we have installed
7474
try:
7575
import zlib
7676

@@ -591,19 +591,21 @@ def decode(obj):
591591
name=obj[u'name'])
592592

593593
elif typ == u'series':
594+
name = obj[u'name']
595+
index = obj[u'index']
596+
klass = globals()[obj[u'klass']]
597+
594598
dtype = dtype_for(obj[u'dtype'])
595599
pd_dtype = pandas_dtype(dtype)
596-
np_dtype = pandas_dtype(dtype).base
597-
598-
index = obj[u'index']
599-
result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
600-
obj[u'compress']),
601-
index=index,
602-
dtype=np_dtype,
603-
name=obj[u'name'])
604600
tz = getattr(pd_dtype, 'tz', None)
601+
602+
arr = unconvert(obj[u'data'], dtype, obj[u'compress'])
603+
605604
if tz:
605+
result = klass(arr, index=index, dtype=pd_dtype.base, name=name)
606606
result = result.dt.tz_localize('UTC').dt.tz_convert(tz)
607+
else:
608+
result = klass(arr, index=index, dtype=pd_dtype, name=name)
607609
return result
608610

609611
elif typ == u'block_manager':

pandas/io/tests/test_packers.py

+4
Original file line numberDiff line numberDiff line change
@@ -363,13 +363,17 @@ def setUp(self):
363363
'F': [Timestamp('20130102', tz='US/Eastern')] * 2 +
364364
[Timestamp('20130603', tz='CET')] * 3,
365365
'G': [Timestamp('20130102', tz='US/Eastern')] * 5,
366+
'H': Categorical([1, 2, 3, 4, 5]),
367+
'I': Categorical([1, 2, 3, 4, 5], ordered=True),
366368
}
367369

368370
self.d['float'] = Series(data['A'])
369371
self.d['int'] = Series(data['B'])
370372
self.d['mixed'] = Series(data['E'])
371373
self.d['dt_tz_mixed'] = Series(data['F'])
372374
self.d['dt_tz'] = Series(data['G'])
375+
self.d['cat_ordered'] = Series(data['H'])
376+
self.d['cat_unordered'] = Series(data['I'])
373377

374378
def test_basic(self):
375379

0 commit comments

Comments
 (0)