|
49 | 49 | from pandas.compat import u, u_safe
|
50 | 50 | from pandas import (Timestamp, Period, Series, DataFrame, # noqa
|
51 | 51 | Index, MultiIndex, Float64Index, Int64Index,
|
52 |
| - Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT) |
| 52 | + Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, |
| 53 | + Categorical) |
53 | 54 | from pandas.tslib import NaTType
|
54 | 55 | from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
|
55 | 56 | from pandas.sparse.array import BlockIndex, IntIndex
|
56 | 57 | from pandas.core.generic import NDFrame
|
57 |
| -from pandas.core.common import ( |
58 |
| - PerformanceWarning, |
59 |
| - needs_i8_conversion, |
60 |
| - pandas_dtype, |
61 |
| -) |
| 58 | +from pandas.core.common import (PerformanceWarning, |
| 59 | + is_categorical_dtype, is_object_dtype, |
| 60 | + needs_i8_conversion, pandas_dtype) |
62 | 61 | from pandas.io.common import get_filepath_or_buffer
|
63 | 62 | from pandas.core.internals import BlockManager, make_block
|
64 | 63 | import pandas.core.internals as internals
|
@@ -226,6 +225,7 @@ def read(fh):
|
226 | 225 | # this is platform int, which we need to remap to np.int64
|
227 | 226 | # for compat on windows platforms
|
228 | 227 | 7: np.dtype('int64'),
|
| 228 | + 'category': 'category' |
229 | 229 | }
|
230 | 230 |
|
231 | 231 |
|
@@ -257,14 +257,17 @@ def convert(values):
|
257 | 257 | """ convert the numpy values to a list """
|
258 | 258 |
|
259 | 259 | dtype = values.dtype
|
| 260 | + |
| 261 | + if is_categorical_dtype(values): |
| 262 | + return values |
| 263 | + |
| 264 | + elif is_object_dtype(dtype): |
| 265 | + return values.ravel().tolist() |
| 266 | + |
260 | 267 | if needs_i8_conversion(dtype):
|
261 | 268 | values = values.view('i8')
|
262 | 269 | v = values.ravel()
|
263 | 270 |
|
264 |
| - # convert object |
265 |
| - if dtype == np.object_: |
266 |
| - return v.tolist() |
267 |
| - |
268 | 271 | if compressor == 'zlib':
|
269 | 272 | _check_zlib()
|
270 | 273 |
|
@@ -298,7 +301,10 @@ def unconvert(values, dtype, compress=None):
|
298 | 301 | if as_is_ext:
|
299 | 302 | values = values.data
|
300 | 303 |
|
301 |
| - if dtype == np.object_: |
| 304 | + if is_categorical_dtype(dtype): |
| 305 | + return values |
| 306 | + |
| 307 | + elif is_object_dtype(dtype): |
302 | 308 | return np.array(values, dtype=object)
|
303 | 309 |
|
304 | 310 | dtype = pandas_dtype(dtype).base
|
@@ -393,6 +399,16 @@ def encode(obj):
|
393 | 399 | u'dtype': u(obj.dtype.name),
|
394 | 400 | u'data': convert(obj.values),
|
395 | 401 | u'compress': compressor}
|
| 402 | + |
| 403 | + elif isinstance(obj, Categorical): |
| 404 | + return {u'typ': u'category', |
| 405 | + u'klass': u(obj.__class__.__name__), |
| 406 | + u'name': getattr(obj, 'name', None), |
| 407 | + u'codes': obj.codes, |
| 408 | + u'categories': obj.categories, |
| 409 | + u'ordered': obj.ordered, |
| 410 | + u'compress': compressor} |
| 411 | + |
396 | 412 | elif isinstance(obj, Series):
|
397 | 413 | if isinstance(obj, SparseSeries):
|
398 | 414 | raise NotImplementedError(
|
@@ -576,10 +592,18 @@ def decode(obj):
|
576 | 592 | result = result.tz_localize('UTC').tz_convert(tz)
|
577 | 593 | return result
|
578 | 594 |
|
| 595 | + elif typ == u'category': |
| 596 | + from_codes = globals()[obj[u'klass']].from_codes |
| 597 | + return from_codes(codes=obj[u'codes'], |
| 598 | + categories=obj[u'categories'], |
| 599 | + ordered=obj[u'ordered'], |
| 600 | + name=obj[u'name']) |
| 601 | + |
579 | 602 | elif typ == u'series':
|
580 | 603 | dtype = dtype_for(obj[u'dtype'])
|
581 | 604 | pd_dtype = pandas_dtype(dtype)
|
582 | 605 | np_dtype = pandas_dtype(dtype).base
|
| 606 | + |
583 | 607 | index = obj[u'index']
|
584 | 608 | result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
|
585 | 609 | obj[u'compress']),
|
|
0 commit comments