From 98671ee1a33c44cdee2f7b2dce06a0f7e385ebb6 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 3 May 2013 15:24:01 -0400
Subject: [PATCH 1/5] ENH: support for msgpack serialization/deserialization

DOC: install.rst mention

DOC: added license from msgpack_numpy

PERF: changed Timestamp and DatetimeIndex serialization for speedups

      add vb_suite benchmarks

ENH: added to_msgpack method in generic.py, and default import into pandas

TST: all packers to always be imported, fail on usage with no msgpack installed
---
 LICENSES/MSGPACK_NUMPY_LICENSE  |  33 +++
 doc/source/install.rst          |   1 +
 pandas/__init__.py              |   1 +
 pandas/core/generic.py          |   4 +
 pandas/core/internals.py        |   1 -
 pandas/io/packers.py            | 378 ++++++++++++++++++++++++++++++++
 pandas/io/tests/test_packers.py | 313 ++++++++++++++++++++++++++
 vb_suite/packers.py             |  80 +++++++
 vb_suite/suite.py               |   1 +
 9 files changed, 811 insertions(+), 1 deletion(-)
 create mode 100644 LICENSES/MSGPACK_NUMPY_LICENSE
 create mode 100644 pandas/io/packers.py
 create mode 100644 pandas/io/tests/test_packers.py
 create mode 100644 vb_suite/packers.py

diff --git a/LICENSES/MSGPACK_NUMPY_LICENSE b/LICENSES/MSGPACK_NUMPY_LICENSE
new file mode 100644
index 0000000000000..57ea631f0f66d
--- /dev/null
+++ b/LICENSES/MSGPACK_NUMPY_LICENSE
@@ -0,0 +1,33 @@
+.. -*- rst -*-
+
+License
+=======
+
+Copyright (c) 2013, Lev Givon.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following
+  disclaimer in the documentation and/or other materials provided
+  with the distribution.
+* Neither the name of Lev Givon nor the names of any
+  contributors may be used to endorse or promote products derived
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 9d14d1b11c6b1..360ded91c86f0 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -93,6 +93,7 @@ Optional Dependencies
     version. Version 0.17.1 or higher.
   * `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
   * `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage
+  * `msgpack <http://www.msgpack.org>`__: necessary for msgpack based serialization
   * `matplotlib <http://matplotlib.sourceforge.net/>`__: for plotting
   * `statsmodels <http://statsmodels.sourceforge.net/>`__
      * Needed for parts of :mod:`pandas.stats`
diff --git a/pandas/__init__.py b/pandas/__init__.py
index bf5bcc81bc21e..3aee9b2ab67d8 100644
--- a/pandas/__init__.py
+++ b/pandas/__init__.py
@@ -32,6 +32,7 @@
 from pandas.io.parsers import (read_csv, read_table, read_clipboard,
                                read_fwf, to_clipboard, ExcelFile,
                                ExcelWriter)
+from pandas.io.packers import read_msgpack, to_msgpack
 from pandas.io.pytables import HDFStore, Term, get_store, read_hdf
 from pandas.io.html import read_html
 from pandas.util.testing import debug
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ed90aab715cfd..571ab4fab07ce 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -487,6 +487,10 @@ def to_hdf(self, path_or_buf, key, **kwargs):
         from pandas.io import pytables
         return pytables.to_hdf(path_or_buf, key, self, **kwargs)
 
+    def to_msgpack(self, path_or_buf, **kwargs):
+        from pandas.io import packers
+        return packers.to_msgpack(path_or_buf, self, **kwargs)
+
 # install the indexerse
 for _name, _indexer in indexing.get_indexers_list():
     PandasObject._create_indexer(_name,_indexer)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 5c0f9253beb62..4628773491d61 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -15,7 +15,6 @@
 from pandas.tslib import Timestamp
 from pandas.util import py3compat
 
-
 class Block(object):
     """
     Canonical n-dimensional unit of homogeneous dtype contained in a pandas
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
new file mode 100644
index 0000000000000..81c85965da4b9
--- /dev/null
+++ b/pandas/io/packers.py
@@ -0,0 +1,378 @@
+"""
+Msgpack serializer support for reading and writing pandas data structures
+to disk
+"""
+
+# porfions of msgpack_numpy package, by Lev Givon were incorporated
+# into this module (and tests_packers.py)
+
+"""
+License
+=======
+
+Copyright (c) 2013, Lev Givon.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+* Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following
+  disclaimer in the documentation and/or other materials provided
+  with the distribution.
+* Neither the name of Lev Givon nor the names of any
+  contributors may be used to endorse or promote products derived
+  from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+"""
+
+from datetime import datetime
+import time
+import re
+import copy
+import itertools
+import warnings
+
+import numpy as np
+from pandas import (
+    Timestamp, Period, Series, TimeSeries, DataFrame, Panel, Panel4D,
+    Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, NaT
+)
+from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel
+from pandas.sparse.array import BlockIndex, IntIndex
+from pandas.tseries.api import PeriodIndex, DatetimeIndex
+from pandas.core.index import Int64Index, _ensure_index
+import pandas.core.common as com
+from pandas.core.common import needs_i8_conversion
+from pandas.core.internals import BlockManager, make_block
+import pandas.core.internals as internals
+
+try:
+    import msgpack
+    from msgpack import _packer, _unpacker
+    _USE_MSGPACK = True
+except:
+    _USE_MSGPACK = False
+
+def to_msgpack(path, obj, **kwargs):
+    """
+    msgpack (serialize) object to input file path
+
+    Parameters
+    ----------
+    path : string
+        File path
+    obj : any object
+    """
+    if not _USE_MSGPACK:
+        raise Exception("please install msgpack to create msgpack stores!")
+    f = open(path, 'wb')
+    try:
+        f.write(msgpack.packb(obj))
+    finally:
+        f.close()
+
+
+def read_msgpack(path):
+    """
+    Load msgpack pandas object from the specified
+    file path
+
+    Parameters
+    ----------
+    path : string
+        File path
+
+    Returns
+    -------
+    obj : type of object stored in file
+    """
+    if not _USE_MSGPACK:
+        raise Exception("please install msgpack to read msgpack stores!")
+    with open(path,'rb') as fh:
+        return msgpack.unpackb(fh.read())
+
+dtype_dict = { 'datetime64[ns]'  : np.dtype('M8[ns]'),
+               'timedelta64[ns]' : np.dtype('m8[ns]') }
+
+def dtype_for(t):
+    if t in dtype_dict:
+        return dtype_dict[t]
+    return np.typeDict[t]
+
+c2f_dict = {'complex':    np.float64,
+            'complex128': np.float64,
+            'complex256': np.float128,
+            'complex64':  np.float32}
+
+def c2f(r, i, ctype_name):
+    """
+    Convert strings to complex number instance with specified numpy type.
+    """
+    
+    ftype = c2f_dict[ctype_name]
+    return np.typeDict[ctype_name](ftype(r)+1j*ftype(i))
+
+def convert(values):
+    """ convert the numpy values to a list """
+
+    dtype = values.dtype
+    if needs_i8_conversion(dtype):
+        values = values.view('i8')
+    return values.ravel().tolist()
+
+
+def encode(obj):
+    """
+    Data encoder
+    """
+        
+    if isinstance(obj, Index):
+        if isinstance(obj, PeriodIndex):
+            return {'typ' : 'period_index',
+                    'klass' : obj.__class__.__name__,
+                    'name' : getattr(obj,'name',None),
+                    'dtype': obj.dtype.name,
+                    'data': obj.tolist() }
+        elif isinstance(obj, DatetimeIndex):
+            return {'typ' : 'datetime_index',
+                    'klass' : obj.__class__.__name__,
+                    'name' : getattr(obj,'name',None),
+                    'dtype': obj.dtype.name,
+                    'data': obj.values.view('i8').tolist(),
+                    'freq' : obj.freqstr,
+                    'tz'   : obj.tz}
+        elif isinstance(obj, MultiIndex):
+            return {'typ' : 'multi_index',
+                    'klass' : obj.__class__.__name__,
+                    'names' : getattr(obj,'names',None),
+                    'dtype': obj.dtype.name,
+                    'data': obj.tolist() }
+        else:
+            return {'typ' : 'index',
+                    'klass' : obj.__class__.__name__,
+                    'name' : getattr(obj,'name',None),
+                    'dtype': obj.dtype.name,
+                    'data': obj.tolist() }
+    elif isinstance(obj, Series):
+        if isinstance(obj, SparseSeries):
+            import pdb; pdb.set_trace()
+        else:
+            return {'typ' : 'series',
+                    'klass' : obj.__class__.__name__,
+                    'name' : getattr(obj,'name',None),
+                    'index' : obj.index,
+                    'dtype': obj.dtype.name,
+                    'data': convert(obj.values) }
+    elif isinstance(obj, DataFrame):
+        if isinstance(obj, SparseDataFrame):
+            import pdb; pdb.set_trace()
+        else:
+
+            data = obj._data
+            if not data.is_consolidated():
+                data = data.consolidate()
+
+           # the block manager
+            return {'typ' : 'dataframe',
+                    'klass'  : obj.__class__.__name__,
+                    'axes'   : data.axes,
+                    'blocks' : [ { 'items'  : b.items, 
+                                   'values' : convert(b.values), 
+                                   'shape'  : b.values.shape,
+                                   'dtype'  : b.dtype.name,
+                                   'klass' : b.__class__.__name__ 
+                                   } for b in data.blocks ] }
+
+    elif isinstance(obj, datetime):
+        if isinstance(obj, Timestamp):
+            tz = obj.tzinfo
+            if tz is not None:
+                tz = tz.zone
+            offset = obj.offset
+            if offset is not None:
+                offset = offset.freqstr
+            return {'typ' : 'timestamp',
+                    'value': obj.value,
+                    'offset' : offset,
+                    'tz' : tz}
+        return { 'typ' : 'datetime',
+                 'data' : obj.isoformat() }
+    elif isinstance(obj, Period):
+        return {'typ' : 'period',
+                'ordinal' : obj.ordinal,
+                'freq' : obj.freq }
+    elif isinstance(obj, np.ndarray):
+        return {'typ' : 'ndarray',
+                'shape': obj.shape,
+                'ndim': obj.ndim,
+                'dtype': obj.dtype.name,
+                'data': convert(obj)}
+    elif isinstance(obj, np.number):
+        if np.iscomplexobj(obj):
+            return {'typ' : 'np_scalar',
+                    'sub_typ' : 'np_complex',
+                    'dtype': obj.dtype.name,
+                    'real': obj.real.__repr__(),
+                    'imag': obj.imag.__repr__()}
+        else:
+            return {'typ' : 'np_scalar',
+                    'dtype': obj.dtype.name,
+                    'data': obj.__repr__()}
+    elif isinstance(obj, complex):
+        return {'typ' : 'np_complex',
+                'real': obj.real.__repr__(),
+                'imag': obj.imag.__repr__()}
+    else:
+        import pdb; pdb.set_trace()
+        return obj
+
+def decode(obj):
+    """
+    Decoder for deserializing numpy data types.
+    """
+    
+    typ = obj.get('typ')
+    if typ is None:
+        return obj
+    elif typ == 'timestamp':
+        return Timestamp(obj['value'],tz=obj['tz'],offset=obj['offset'])
+    elif typ == 'period':
+        return Period(ordinal=obj['ordinal'],freq=obj['freq'])
+    elif typ == 'index':
+        dtype = dtype_for(obj['dtype'])
+        data = obj['data']
+        return globals()[obj['klass']](data,dtype=dtype,name=obj['name'])
+    elif typ == 'multi_index':
+        return globals()[obj['klass']].from_tuples(obj['data'],names=obj['names'])
+    elif typ == 'period_index':
+        return globals()[obj['klass']](obj['data'],name=obj['name'])
+    elif typ == 'datetime_index':
+        return globals()[obj['klass']](obj['data'],freq=obj['freq'],tz=obj['tz'],name=obj['name'])
+    elif typ == 'series':
+        dtype = dtype_for(obj['dtype'])
+        index = obj['index']
+        return globals()[obj['klass']](obj['data'],index=index,dtype=dtype,name=obj['name'])
+    elif typ == 'dataframe':
+        axes = obj['axes']
+
+        def create_block(b):
+            dtype = dtype_for(b['dtype'])
+            return make_block(np.array(b['values'],dtype=dtype).reshape(b['shape']),b['items'],axes[0],klass=getattr(internals,b['klass'])) 
+
+        blocks = [ create_block(b) for b in obj['blocks'] ]
+        return globals()[obj['klass']](BlockManager(blocks, axes))
+    elif typ == 'datetime':
+        import pdb; pdb.set_trace()
+        return datetime.fromtimestamp(obj['data'])
+    elif typ == 'ndarray':
+        return np.array(obj['data'],
+                        dtype=np.typeDict[obj['dtype']],
+                        ndmin=obj['ndim']).reshape(obj['shape'])
+    elif typ == 'np_scalar':
+        if obj.get('sub_typ') == 'np_complex':
+            return c2f(obj['real'], obj['imag'], obj['dtype'])
+        else:
+            return np.typeDict[obj['dtype']](obj['data'])
+    elif typ == 'np_complex':
+        return complex(obj['real']+'+'+obj['imag']+'j')
+    elif isinstance(obj, (dict,list,set)):
+        return obj
+    else:
+        import pdb; pdb.set_trace()
+        return obj
+
+def pack(o, stream, default=encode, 
+         encoding='utf-8', unicode_errors='strict'):
+    """
+    Pack an object and write it to a stream.
+    """
+
+    _packer.pack(o, stream, default=default, 
+                  encoding=encoding,
+                  unicode_errors=unicode_errors)
+def packb(o, default=encode, 
+          encoding='utf-8', unicode_errors='strict', use_single_float=False):
+    """
+    Pack an object and return the packed bytes.
+    """
+
+    return _packer.packb(o, default=default, encoding=encoding,
+                          unicode_errors=unicode_errors, 
+                          use_single_float=use_single_float)
+
+def unpack(stream, object_hook=decode, list_hook=None, use_list=None,
+           encoding='utf-8', unicode_errors='strict', object_pairs_hook=None):
+    """
+    Unpack a packed object from a stream.
+    """
+
+    return _unpacker.unpack(stream, object_hook=object_hook,
+                           list_hook=list_hook, use_list=use_list, 
+                           encoding=encoding,
+                           unicode_errors=unicode_errors,
+                           object_pairs_hook=object_pairs_hook)
+def unpackb(packed, object_hook=decode, 
+            list_hook=None, use_list=None, encoding='utf-8',
+            unicode_errors='strict', object_pairs_hook=None):
+    """
+    Unpack a packed object.
+    """
+
+    return _unpacker.unpackb(packed, object_hook=object_hook,
+                            list_hook=list_hook, 
+                            use_list=use_list, encoding=encoding,
+                            unicode_errors=unicode_errors, 
+                            object_pairs_hook=object_pairs_hook)
+
+if _USE_MSGPACK:
+
+    class Packer(_packer.Packer):
+        def __init__(self, default=encode, 
+                     encoding='utf-8',
+                     unicode_errors='strict',
+                     use_single_float=False):
+            super(Packer, self).__init__(default=default, 
+                                         encoding=encoding,
+                                         unicode_errors=unicode_errors,
+                                         use_single_float=use_single_float)
+
+    class Unpacker(_unpacker.Unpacker):
+        def __init__(self, file_like=None, read_size=0, use_list=None,
+                     object_hook=decode,
+                     object_pairs_hook=None, list_hook=None, encoding='utf-8',
+                     unicode_errors='strict', max_buffer_size=0):
+            super(Unpacker, self).__init__(file_like=file_like, 
+                                           read_size=read_size,    
+                                           use_list=use_list, 
+                                           object_hook=object_hook, 
+                                           object_pairs_hook=object_pairs_hook, 
+                                           list_hook=list_hook,
+                                           encoding=encoding, 
+                                           unicode_errors=unicode_errors, 
+                                           max_buffer_size=max_buffer_size)
+            
+    setattr(msgpack, 'Packer', Packer)
+    setattr(msgpack, 'Unpacker', Unpacker)
+    setattr(msgpack, 'load', unpack)
+    setattr(msgpack, 'loads', unpackb)
+    setattr(msgpack, 'dump', pack)
+    setattr(msgpack, 'dumps', packb)
+    setattr(msgpack, 'pack', pack)
+    setattr(msgpack, 'packb', packb)
+    setattr(msgpack, 'unpack', unpack)
+    setattr(msgpack, 'unpackb', unpackb)
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
new file mode 100644
index 0000000000000..08986cb082131
--- /dev/null
+++ b/pandas/io/tests/test_packers.py
@@ -0,0 +1,313 @@
+import nose
+import unittest
+import os
+import sys
+import warnings
+
+import datetime
+import numpy as np
+
+from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
+                    date_range, period_range, Index, SparseSeries, SparseDataFrame,
+                    SparsePanel)
+import pandas.util.testing as tm
+from pandas.util.testing import ensure_clean
+from pandas.tests.test_series import assert_series_equal
+from pandas.tests.test_frame import assert_frame_equal
+from pandas import concat, Timestamp, tslib
+
+from numpy.testing.decorators import slow
+nan = np.nan
+
+from pandas.io.packers import to_msgpack, read_msgpack, _USE_MSGPACK
+
+if not _USE_MSGPACK:
+    raise nose.SkipTest('no msgpack')
+
+_multiprocess_can_split_ = False
+
+class Test(unittest.TestCase):
+
+    def setUp(self):
+        self.path = '__%s__.msg' % tm.rands(10)
+
+    def tearDown(self):
+        pass
+
+    def encode_decode(self, x):
+        with ensure_clean(self.path) as p:
+            to_msgpack(p,x)
+            return read_msgpack(p)
+
+class TestNumpy(Test):
+
+    def test_numpy_scalar_float(self):
+        x = np.float32(np.random.rand())
+        x_rec = self.encode_decode(x)
+        assert x == x_rec and type(x) == type(x_rec)
+
+    def test_numpy_scalar_complex(self):
+        x = np.complex64(np.random.rand()+1j*np.random.rand())
+        x_rec = self.encode_decode(x)
+        assert x == x_rec and type(x) == type(x_rec)
+
+    def test_scalar_float(self):
+        x = np.random.rand()
+        x_rec = self.encode_decode(x)
+        assert x == x_rec and type(x) == type(x_rec)
+
+    def test_scalar_complex(self):
+        x = np.random.rand()+1j*np.random.rand()
+        x_rec = self.encode_decode(x)
+        assert x == x_rec and type(x) == type(x_rec)
+
+    def test_list_numpy_float(self):
+        x = [np.float32(np.random.rand()) for i in xrange(5)]
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                       all(map(lambda x,y: type(x) == type(y), x, x_rec))
+
+    def test_list_numpy_float_complex(self):
+        x = [np.float32(np.random.rand()) for i in xrange(5)] + \
+          [np.complex128(np.random.rand()+1j*np.random.rand()) for i in xrange(5)]
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                       all(map(lambda x,y: type(x) == type(y), x, x_rec))
+
+    def test_list_float(self):
+        x = [np.random.rand() for i in xrange(5)]
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                       all(map(lambda x,y: type(x) == type(y), x, x_rec))
+
+    def test_list_float_complex(self):
+        x = [np.random.rand() for i in xrange(5)] + \
+          [(np.random.rand()+1j*np.random.rand()) for i in xrange(5)]
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                       all(map(lambda x,y: type(x) == type(y), x, x_rec))
+
+    def test_dict_float(self):
+        x = {'foo': 1.0, 'bar': 2.0}
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x.values(), x_rec.values())) and \
+                       all(map(lambda x,y: type(x) == type(y), x.values(), x_rec.values()))
+
+    def test_dict_complex(self):
+        x = {'foo': 1.0+1.0j, 'bar': 2.0+2.0j}
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x.values(), x_rec.values())) and \
+                       all(map(lambda x,y: type(x) == type(y), x.values(), x_rec.values()))
+
+    def test_dict_numpy_float(self):
+        x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)}
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x.values(), x_rec.values())) and \
+                       all(map(lambda x,y: type(x) == type(y), x.values(), x_rec.values()))
+
+    def test_dict_numpy_complex(self):
+        x = {'foo': np.complex128(1.0+1.0j), 'bar': np.complex128(2.0+2.0j)}
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x.values(), x_rec.values())) and \
+                       all(map(lambda x,y: type(x) == type(y), x.values(), x_rec.values()))
+
+    def test_numpy_array_float(self):
+        x = np.random.rand(5).astype(np.float32)
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                                     x.dtype == x_rec.dtype
+    def test_numpy_array_complex(self):
+        x = (np.random.rand(5)+1j*np.random.rand(5)).astype(np.complex128)
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                       x.dtype == x_rec.dtype
+
+    def test_list_mixed(self):
+        x = [1.0, np.float32(3.5), np.complex128(4.25), u'foo']
+        x_rec = self.encode_decode(x)
+        assert all(map(lambda x,y: x == y, x, x_rec)) and \
+                           all(map(lambda x,y: type(x) == type(y), x, x_rec))
+class TestBasic(Test):
+
+    def test_timestamp(self):
+
+        for i in [ Timestamp('20130101'), Timestamp('20130101',tz='US/Eastern'),
+                   Timestamp('201301010501') ]:
+            i_rec = self.encode_decode(i)
+            self.assert_(i == i_rec)
+
+class TestIndex(Test):
+
+    def setUp(self):
+        super(TestIndex, self).setUp()
+
+        self.d = {
+            'string' : tm.makeStringIndex(100),
+            'date'   : tm.makeDateIndex(100), 
+            'int'    : tm.makeIntIndex(100), 
+            'float'  : tm.makeFloatIndex(100), 
+            'empty'  : Index([]), 
+            'tuple'   : Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])),
+            'period'  : Index(period_range('2012-1-1', freq='M', periods=3)),
+            'date2'   : Index(date_range('2013-01-1', periods=10)),
+            'bdate'   : Index(bdate_range('2013-01-02',periods=10)),
+            }
+        
+        self.mi = {
+            'reg' : MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'),
+                                            ('qux', 'one'), ('qux', 'two')], names=['first','second']),
+            }
+
+    def test_basic_index(self):
+
+        for s, i in self.d.items():
+            i_rec = self.encode_decode(i)
+            self.assert_(i.equals(i_rec))
+
+    def test_multi_index(self):
+
+        for s, i in self.mi.items():
+            i_rec = self.encode_decode(i)
+            self.assert_(i.equals(i_rec))
+
+    def test_unicode(self):
+        i = tm.makeUnicodeIndex(100)
+        i_rec = self.encode_decode(i)
+        self.assert_(i.equals(i_rec))
+
+class TestSeries(Test):
+
+    def setUp(self):
+        super(TestSeries, self).setUp()
+
+        self.d = {}
+
+
+        s = tm.makeStringSeries()
+        s.name = 'string'
+        self.d['string'] = s
+
+        s = tm.makeObjectSeries()
+        s.name = 'object'
+        self.d['object'] = s
+
+        s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5))
+        self.d['date'] = s
+        
+        data = {
+            'A': [0., 1., 2., 3., np.nan],
+            'B': [0, 1, 0, 1, 0],
+            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+            'D': date_range('1/1/2009', periods=5),
+            'E' : [0., 1, Timestamp('20100101'),'foo',2.],
+            }
+    
+        self.d['float'] = Series(data['A'])
+        self.d['int']   = Series(data['B'])
+        self.d['mixed'] = Series(data['E'])
+
+    def test_basic(self):
+
+        for s, i in self.d.items():
+            i_rec = self.encode_decode(i)
+            assert_series_equal(i,i_rec)
+
+class TestFrame(Test):
+
+    def setUp(self):
+        super(TestFrame, self).setUp()
+
+        data = {
+            'A': [0., 1., 2., 3., np.nan],
+            'B': [0, 1, 0, 1, 0],
+            'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+            'D': date_range('1/1/2009', periods=5),
+            'E' : [0., 1, Timestamp('20100101'),'foo',2.],
+            }
+        
+        self.d = { 'float' : DataFrame(dict(A = data['A'], B = Series(data['A']) + 1)),
+                   'int'   : DataFrame(dict(A = data['B'], B = Series(data['B']) + 1)),
+                   'mixed' : DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])) }
+
+    def test_basic(self):
+
+        for s, i in self.d.items():
+            i_rec = self.encode_decode(i)
+            assert_frame_equal(i,i_rec)
+
+    def test_multi(self):
+
+        i_rec = self.encode_decode(self.d)
+        for k in self.d.keys():
+            assert_frame_equal(self.d[k],i_rec[k])
+
+        l = [ self.d['float'], self.d['float'].A, self.d['float'].B, None ]
+        l_rec = self.encode_decode(l)
+        self.assert_(len(l) == len(l_rec))
+        assert_frame_equal(l[0],l_rec[0])
+        assert_series_equal(l[1],l_rec[1])
+        assert_series_equal(l[2],l_rec[2])
+        self.assert_(l[3] == l_rec[3])
+
+def _create_sp_series():
+
+    # nan-based
+    arr = np.arange(15, dtype=float)
+    index = np.arange(15)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    date_index = bdate_range('1/1/2011', periods=len(index))
+    bseries = SparseSeries(arr, index=index, kind='block')
+    bseries.name = 'bseries'
+    return bseries
+
+def _create_sp_frame():
+
+    data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+            'C': np.arange(10),
+            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
+    
+    dates = bdate_range('1/1/2011', periods=10)
+    return SparseDataFrame(data, index=dates)
+
+def create_data():
+    """ create the pickle data """
+    
+    data = {
+        'A': [0., 1., 2., 3., np.nan],
+        'B': [0, 1, 0, 1, 0],
+        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+        'D': date_range('1/1/2009', periods=5),
+        'E' : [0., 1, Timestamp('20100101'),'foo',2.],
+        }
+    
+    index  = dict(int   = Index(np.arange(10)),
+                  date  = date_range('20130101',periods=10))
+    mi     = dict(reg   = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+                                                      ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
+                                                 names=['first', 'second']))
+    series = dict(float = Series(data['A']),
+                  int   = Series(data['B']),
+                  mixed = Series(data['E']))
+    frame  = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
+                  int   = DataFrame(dict(A = series['int']  , B = series['int']   + 1)),
+                  mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
+    panel  = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
+
+ 
+
+    return dict( series = series, 
+                 frame  = frame, 
+                 panel  = panel,
+                 index  = index,
+                 mi     = mi,
+                 sp_series = dict(float = _create_sp_series()),
+                 sp_frame  = dict(float = _create_sp_frame())
+                 )
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   exit=False)
diff --git a/vb_suite/packers.py b/vb_suite/packers.py
new file mode 100644
index 0000000000000..6733b5fa6dfb8
--- /dev/null
+++ b/vb_suite/packers.py
@@ -0,0 +1,80 @@
+from vbench.api import Benchmark
+from datetime import datetime
+
+start_date = datetime(2013, 5, 1)
+
+common_setup = """from pandas_vb_common import *
+import os
+from pandas.io import packers
+from pandas.core import common as com
+
+f = '__test__.msg'
+def remove(f):
+   try:
+       os.remove(f)
+   except:
+       pass
+
+"""
+
+#----------------------------------------------------------------------
+# read a pack
+
+setup1 = common_setup + """
+index = date_range('20000101',periods=25000,freq='H')
+df = DataFrame({'float1' : randn(25000),
+                'float2' : randn(25000)},
+               index=index)
+remove(f)
+packers.save(f,df)
+"""
+
+read_pack = Benchmark("packers.load(f)", setup1, 
+                       start_date=start_date)
+
+
+#----------------------------------------------------------------------
+# write to a pack
+
+setup2 = common_setup + """
+index = date_range('20000101',periods=25000,freq='H')
+df = DataFrame({'float1' : randn(25000),
+                'float2' : randn(25000)},
+               index=index)
+remove(f)
+"""
+
+write_pack = Benchmark(
+    "packers.save(f,df)", setup2, cleanup="remove(f)",
+    start_date=start_date)
+
+#----------------------------------------------------------------------
+# read a pickle
+
+setup1 = common_setup + """
+index = date_range('20000101',periods=25000,freq='H')
+df = DataFrame({'float1' : randn(25000),
+                'float2' : randn(25000)},
+               index=index)
+remove(f)
+df.save(f)
+"""
+
+read_pickle = Benchmark("com.load(f)", setup1, 
+                       start_date=start_date)
+
+
+#----------------------------------------------------------------------
+# write to a pickle
+
+setup2 = common_setup + """
+index = date_range('20000101',periods=25000,freq='H')
+df = DataFrame({'float1' : randn(25000),
+                'float2' : randn(25000)},
+               index=index)
+remove(f)
+"""
+
+write_pickle = Benchmark(
+    "df.save(f)", setup2, cleanup="remove(f)",
+    start_date=start_date)
diff --git a/vb_suite/suite.py b/vb_suite/suite.py
index 905c4371837cc..4ac967dc1664a 100644
--- a/vb_suite/suite.py
+++ b/vb_suite/suite.py
@@ -16,6 +16,7 @@
            'join_merge',
            'miscellaneous',
            'panel_ctor',
+           'packers',
            'parser',
            'reindex',
            'replace',

From 4870ad95b23c7ac77880e3d388deb0ca48c66a4a Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Fri, 3 May 2013 19:37:05 -0400
Subject: [PATCH 2/5] DOC: added mentions in release notes, v0.11.1, basics

ENH: provide automatic list if multiple args passed to to_msgpack

DOC: changed docs to 0.12

ENH: iterator support for stream unpacking
---
 RELEASE.rst                     |   3 +
 doc/source/basics.rst           |  41 ------------
 doc/source/io.rst               |  73 +++++++++++++++++++++
 doc/source/v0.11.1.txt          |   9 ++-
 doc/source/v0.12.0.txt          |  35 ++++++++++
 doc/source/whatsnew.rst         |   2 +
 pandas/io/packers.py            | 113 +++++++++++++++++---------------
 pandas/io/tests/test_packers.py |  34 ++++++++--
 8 files changed, 207 insertions(+), 103 deletions(-)
 create mode 100644 doc/source/v0.12.0.txt

diff --git a/RELEASE.rst b/RELEASE.rst
index 77e8e85db6a76..cefb18c9f0ddf 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -32,6 +32,8 @@ pandas 0.11.1
 
   - pd.read_html() can now parse HTML string, files or urls and return dataframes
     courtesy of @cpcloud. (GH3477_)
+  - ``pd.read_msgpack()`` and ``pd.to_msgpack()`` are now a supported method of serialization
+    of arbitrary pandas (and python objects) in a lightweight portable binary format (GH686_)
 
 **Improvements to existing features**
 
@@ -75,6 +77,7 @@ pandas 0.11.1
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
 .. _GH2786: https://github.com/pydata/pandas/issues/2786
+.. _GH686: https://github.com/pydata/pandas/issues/686
 .. _GH2194: https://github.com/pydata/pandas/issues/2194
 .. _GH3230: https://github.com/pydata/pandas/issues/3230
 .. _GH3251: https://github.com/pydata/pandas/issues/3251
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 5739fe0922d6d..c6f036d9541a6 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -1192,47 +1192,6 @@ While float dtypes are unchanged.
    casted
    casted.dtypes
 
-.. _basics.serialize:
-
-Pickling and serialization
---------------------------
-
-All pandas objects are equipped with ``save`` methods which use Python's
-``cPickle`` module to save data structures to disk using the pickle format.
-
-.. ipython:: python
-
-   df
-   df.save('foo.pickle')
-
-The ``load`` function in the ``pandas`` namespace can be used to load any
-pickled pandas object (or any other pickled object) from file:
-
-
-.. ipython:: python
-
-   load('foo.pickle')
-
-There is also a ``save`` function which takes any object as its first argument:
-
-.. ipython:: python
-
-   save(df, 'foo.pickle')
-   load('foo.pickle')
-
-.. ipython:: python
-   :suppress:
-
-   import os
-   os.remove('foo.pickle')
-
-.. warning::
-
-   Loading pickled data received from untrusted sources can be unsafe.
-
-   See: http://docs.python.org/2.7/library/pickle.html
-
-
 Working with package options
 ----------------------------
 
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 9001ae393d552..1d90cf98067ea 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -981,6 +981,79 @@ one can use the ExcelWriter class, as in the following example:
 
 .. _io.hdf5:
 
+.. _basics.serialize:
+
+Serialization
+-------------
+
+msgpack
+~~~~~~~
+
+Starting in 0.12.0, pandas is supporting the ``msgpack`` format for 
+object serialization. This is a lightweight portable binary format, similar
+to binary JSON, that is highly space efficient, and provides good performance 
+both on the writing (serialization), and reading (deserialization).
+
+.. ipython:: python
+
+   df = DataFrame(np.random.rand(5,2),columns=list('AB'))
+   df.to_msgpack('foo.msg')
+   pd.read_msgpack('foo.msg')
+   s = Series(np.random.rand(5),index=date_range('20130101',periods=5))
+
+You can pass a list of objects and you will receive them back on deserialization.
+
+.. ipython:: python
+
+   pd.to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s)
+   pd.read_msgpack('foo.msg')
+
+.. ipython:: python
+   :suppress:
+   :okexcept:
+
+   os.remove('foo.msg')
+
+
+pickling
+~~~~~~~~
+
+All pandas objects are equipped with ``save`` methods which use Python's
+``cPickle`` module to save data structures to disk using the pickle format.
+
+.. ipython:: python
+
+   df
+   df.save('foo.pickle')
+
+The ``load`` function in the ``pandas`` namespace can be used to load any
+pickled pandas object (or any other pickled object) from file:
+
+
+.. ipython:: python
+
+   load('foo.pickle')
+
+There is also a ``save`` function which takes any object as its first argument:
+
+.. ipython:: python
+
+   save(df, 'foo.pickle')
+   load('foo.pickle')
+
+.. ipython:: python
+   :suppress:
+
+   import os
+   os.remove('foo.pickle')
+
+.. warning::
+
+   Loading pickled data received from untrusted sources can be unsafe.
+
+   See: http://docs.python.org/2.7/library/pickle.html
+
+
 HDF5 (PyTables)
 ---------------
 
diff --git a/doc/source/v0.11.1.txt b/doc/source/v0.11.1.txt
index 5cfb24d71e19b..d87e9eea8a35e 100644
--- a/doc/source/v0.11.1.txt
+++ b/doc/source/v0.11.1.txt
@@ -1,10 +1,9 @@
-.. _whatsnew_0120:
+.. _whatsnew_0111:
 
-v0.12.0 (??)
+v0.11.1 (??)
 ------------------------
 
-This is a major release from 0.11.0 and includes many new features and
-enhancements along with a large number of bug fixes.
+This is a minor release from 0.11.0 and include a small number of enhances and bug fixes.
 
 API changes
 ~~~~~~~~~~~
@@ -12,7 +11,7 @@ API changes
 
 Enhancements
 ~~~~~~~~~~~~
-  - pd.read_html() can now parse HTML string, files or urls and return dataframes
+  - ``pd.read_html()`` can now parse HTML string, files or urls and return dataframes
     courtesy of @cpcloud. (GH3477_)
 
 See the `full release notes
diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt
new file mode 100644
index 0000000000000..c680af0221a38
--- /dev/null
+++ b/doc/source/v0.12.0.txt
@@ -0,0 +1,35 @@
+.. _whatsnew_0120:
+
+v0.12.0 (??????)
+----------------
+
+This is a major release from 0.11.1 and includes many new features and
+enhancements along with a large number of bug fixes. There are also a 
+number of important API changes that long-time pandas users should
+pay close attention to.
+
+Enhancements
+~~~~~~~~~~~~
+
+- ``pd.read_msgpack()`` and ``pd.to_msgpack()`` are now a supported method of serialization
+    of arbitrary pandas (and python objects) in a lightweight portable binary format
+
+    .. ipython:: python
+
+          df = DataFrame(np.random.rand(5,2),columns=list('AB'))
+          df.to_msgpack('foo.msg')
+          pd.read_msgpack('foo.msg')
+
+          s = Series(np.random.rand(5),index=date_range('20130101',periods=5))
+          pd.to_msgpack('foo.msg', df, s)
+          pd.read_msgpack('foo.msg')
+
+    .. ipython:: python
+          :suppress:
+          :okexcept:
+
+          os.remove('foo.msg')
+
+See the `full release notes
+<https://github.com/pydata/pandas/blob/master/RELEASE.rst>`__ or issue tracker
+on GitHub for a complete list.
diff --git a/doc/source/whatsnew.rst b/doc/source/whatsnew.rst
index 81bd39dd0e70f..a02e41176ced1 100644
--- a/doc/source/whatsnew.rst
+++ b/doc/source/whatsnew.rst
@@ -18,6 +18,8 @@ These are new features and improvements of note in each release.
 
 .. include:: v0.12.0.txt
 
+.. include:: v0.11.1.txt
+
 .. include:: v0.11.0.txt
 
 .. include:: v0.10.1.txt
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 81c85965da4b9..ecd0ee1d02f5f 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -68,7 +68,7 @@
 except:
     _USE_MSGPACK = False
 
-def to_msgpack(path, obj, **kwargs):
+def to_msgpack(path, *args, **kwargs):
     """
     msgpack (serialize) object to input file path
 
@@ -76,18 +76,30 @@ def to_msgpack(path, obj, **kwargs):
     ----------
     path : string
         File path
-    obj : any object
+    args : an object or objects to serialize
+
+    append : boolean whether to append to an existing msgpack
+             (default is False)
     """
     if not _USE_MSGPACK:
         raise Exception("please install msgpack to create msgpack stores!")
-    f = open(path, 'wb')
+
+    append = kwargs.get('append')
+    if append:
+        f = open(path, 'a+b')
+    else:
+        f = open(path, 'wb')
     try:
-        f.write(msgpack.packb(obj))
+        if len(args) == 1:
+            f.write(pack(args[0]))
+        else:
+            for a in args:
+                f.write(pack(a))
     finally:
         f.close()
 
 
-def read_msgpack(path):
+def read_msgpack(path, iterator=False, **kwargs):
     """
     Load msgpack pandas object from the specified
     file path
@@ -96,15 +108,24 @@ def read_msgpack(path):
     ----------
     path : string
         File path
+    iterator : boolean, if True, return an iterator to the unpacker
+               (default is False)
 
     Returns
     -------
     obj : type of object stored in file
+
     """
     if not _USE_MSGPACK:
         raise Exception("please install msgpack to read msgpack stores!")
+    if iterator:
+        return Iterator(path)
+
     with open(path,'rb') as fh:
-        return msgpack.unpackb(fh.read())
+        l = list(unpack(fh))
+        if len(l) == 1:
+            return l[0]
+        return l
 
 dtype_dict = { 'datetime64[ns]'  : np.dtype('M8[ns]'),
                'timedelta64[ns]' : np.dtype('m8[ns]') }
@@ -296,48 +317,29 @@ def create_block(b):
         import pdb; pdb.set_trace()
         return obj
 
-def pack(o, stream, default=encode, 
-         encoding='utf-8', unicode_errors='strict'):
-    """
-    Pack an object and write it to a stream.
-    """
-
-    _packer.pack(o, stream, default=default, 
-                  encoding=encoding,
-                  unicode_errors=unicode_errors)
-def packb(o, default=encode, 
-          encoding='utf-8', unicode_errors='strict', use_single_float=False):
+def pack(o, default=encode, 
+         encoding='utf-8', unicode_errors='strict', use_single_float=False):
     """
     Pack an object and return the packed bytes.
     """
 
-    return _packer.packb(o, default=default, encoding=encoding,
-                          unicode_errors=unicode_errors, 
-                          use_single_float=use_single_float)
-
-def unpack(stream, object_hook=decode, list_hook=None, use_list=None,
-           encoding='utf-8', unicode_errors='strict', object_pairs_hook=None):
-    """
-    Unpack a packed object from a stream.
-    """
+    return Packer(default=default, encoding=encoding,
+           unicode_errors=unicode_errors, 
+           use_single_float=use_single_float).pack(o)
 
-    return _unpacker.unpack(stream, object_hook=object_hook,
-                           list_hook=list_hook, use_list=use_list, 
-                           encoding=encoding,
-                           unicode_errors=unicode_errors,
-                           object_pairs_hook=object_pairs_hook)
-def unpackb(packed, object_hook=decode, 
-            list_hook=None, use_list=None, encoding='utf-8',
-            unicode_errors='strict', object_pairs_hook=None):
+def unpack(packed, object_hook=decode, 
+           list_hook=None, use_list=False, encoding='utf-8',
+           unicode_errors='strict', object_pairs_hook=None):
     """
-    Unpack a packed object.
+    Unpack a packed object, return an iterator
+    Note: packed lists will be returned as tuples
     """
 
-    return _unpacker.unpackb(packed, object_hook=object_hook,
-                            list_hook=list_hook, 
-                            use_list=use_list, encoding=encoding,
-                            unicode_errors=unicode_errors, 
-                            object_pairs_hook=object_pairs_hook)
+    return Unpacker(packed, object_hook=object_hook,
+                    list_hook=list_hook, 
+                    use_list=use_list, encoding=encoding,
+                    unicode_errors=unicode_errors, 
+                    object_pairs_hook=object_pairs_hook)
 
 if _USE_MSGPACK:
 
@@ -352,7 +354,7 @@ def __init__(self, default=encode,
                                          use_single_float=use_single_float)
 
     class Unpacker(_unpacker.Unpacker):
-        def __init__(self, file_like=None, read_size=0, use_list=None,
+        def __init__(self, file_like=None, read_size=0, use_list=False,
                      object_hook=decode,
                      object_pairs_hook=None, list_hook=None, encoding='utf-8',
                      unicode_errors='strict', max_buffer_size=0):
@@ -365,14 +367,21 @@ def __init__(self, file_like=None, read_size=0, use_list=None,
                                            encoding=encoding, 
                                            unicode_errors=unicode_errors, 
                                            max_buffer_size=max_buffer_size)
-            
-    setattr(msgpack, 'Packer', Packer)
-    setattr(msgpack, 'Unpacker', Unpacker)
-    setattr(msgpack, 'load', unpack)
-    setattr(msgpack, 'loads', unpackb)
-    setattr(msgpack, 'dump', pack)
-    setattr(msgpack, 'dumps', packb)
-    setattr(msgpack, 'pack', pack)
-    setattr(msgpack, 'packb', packb)
-    setattr(msgpack, 'unpack', unpack)
-    setattr(msgpack, 'unpackb', unpackb)
+
+class Iterator(object):
+    """ manage the unpacking iteration,
+        close the file on completion """
+
+    def __init__(self, path, **kwargs):
+        self.path = path
+        self.kwargs = kwargs
+
+    def __iter__(self):
+
+        try:
+            fh   = open(self.path,'rb')
+            unpacker = unpack(fh)
+            for o in unpacker:
+                yield o
+        finally:
+            fh.close()
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 08986cb082131..294b99d20a85b 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -26,6 +26,19 @@
 
 _multiprocess_can_split_ = False
 
+def check_arbitrary(a, b):
+
+    if isinstance(a,(list,tuple)) and isinstance(b,(list,tuple)):
+        assert(len(a) == len(b))
+        for a_, b_ in zip(a,b):
+            check_arbitrary(a_,b_)
+    elif isinstance(a,DataFrame):
+        assert_frame_equal(a,b)
+    elif isinstance(a,Series):
+        assert_series_equal(a,b)
+    else:
+        assert(a == b)
+
 class Test(unittest.TestCase):
 
     def setUp(self):
@@ -241,13 +254,24 @@ def test_multi(self):
         for k in self.d.keys():
             assert_frame_equal(self.d[k],i_rec[k])
 
+        l = tuple([ self.d['float'], self.d['float'].A, self.d['float'].B, None ])
+        l_rec = self.encode_decode(l)
+        check_arbitrary(l,l_rec)
+
+        # this is an oddity in that packed lists will be returned as tuples
         l = [ self.d['float'], self.d['float'].A, self.d['float'].B, None ]
         l_rec = self.encode_decode(l)
-        self.assert_(len(l) == len(l_rec))
-        assert_frame_equal(l[0],l_rec[0])
-        assert_series_equal(l[1],l_rec[1])
-        assert_series_equal(l[2],l_rec[2])
-        self.assert_(l[3] == l_rec[3])
+        self.assert_(isinstance(l_rec,tuple))
+        check_arbitrary(l,l_rec)
+
+    def test_iterator(self):
+
+        l = [ self.d['float'], self.d['float'].A, self.d['float'].B, None ]
+
+        with ensure_clean(self.path) as path:
+            to_msgpack(path,*l)
+            for i, packed in enumerate(read_msgpack(path, iterator=True)):
+                check_arbitrary(packed,l[i])
 
 def _create_sp_series():
 

From c9a9e3e1e4fefad203ecd25a1dfa31d038f18dd5 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 4 May 2013 11:13:08 -0400
Subject: [PATCH 3/5] ENH: added support for
 Panel,SparseSeries,SparseDataFrame,SparsePanel,IntIndex,BlockIndex

---
 doc/source/io.rst               |  15 ++++
 doc/source/v0.12.0.txt          |   7 ++
 pandas/io/packers.py            |  59 +++++++++++--
 pandas/io/tests/test_packers.py | 145 ++++++++++++++++++--------------
 4 files changed, 156 insertions(+), 70 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index 1d90cf98067ea..9df6f4d3ecca6 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1008,6 +1008,21 @@ You can pass a list of objects and you will receive them back on deserialization
    pd.to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s)
    pd.read_msgpack('foo.msg')
 
+You can pass ``iterator=True`` to iterator over the unpacked results
+
+.. ipython:: python
+
+   for o in pd.read_msgpack('foo.msg',iterator=True):
+       print o
+
+
+You can pass ``append=True`` to the writer to append to an existing pack
+
+.. ipython:: python
+
+   df.to_msgpack('foo.msg',append=True)
+   pd.read_msgpack('foo.msg')
+
 .. ipython:: python
    :suppress:
    :okexcept:
diff --git a/doc/source/v0.12.0.txt b/doc/source/v0.12.0.txt
index c680af0221a38..ccb9347135c48 100644
--- a/doc/source/v0.12.0.txt
+++ b/doc/source/v0.12.0.txt
@@ -24,6 +24,13 @@ Enhancements
           pd.to_msgpack('foo.msg', df, s)
           pd.read_msgpack('foo.msg')
 
+    You can pass ``iterator=True`` to iterator over the unpacked results
+
+    .. ipython:: python
+
+          for o in pd.read_msgpack('foo.msg',iterator=True):
+              print o
+
     .. ipython:: python
           :suppress:
           :okexcept:
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index ecd0ee1d02f5f..c31b3c4b98719 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -57,6 +57,7 @@
 from pandas.tseries.api import PeriodIndex, DatetimeIndex
 from pandas.core.index import Int64Index, _ensure_index
 import pandas.core.common as com
+from pandas.core.generic import NDFrame
 from pandas.core.common import needs_i8_conversion
 from pandas.core.internals import BlockManager, make_block
 import pandas.core.internals as internals
@@ -162,6 +163,7 @@ def encode(obj):
     Data encoder
     """
         
+    tobj = type(obj)
     if isinstance(obj, Index):
         if isinstance(obj, PeriodIndex):
             return {'typ' : 'period_index',
@@ -191,7 +193,15 @@ def encode(obj):
                     'data': obj.tolist() }
     elif isinstance(obj, Series):
         if isinstance(obj, SparseSeries):
-            import pdb; pdb.set_trace()
+            d = {'typ' : 'sparse_series',
+                 'klass' : obj.__class__.__name__,
+                 'dtype': obj.dtype.name,
+                 'index' : obj.index,
+                 'sp_index' : obj.sp_index,
+                 'sp_values' : convert(obj.sp_values)}
+            for f in ['name','fill_value','kind']:
+                d[f] = getattr(obj,f,None)
+            return d
         else:
             return {'typ' : 'series',
                     'klass' : obj.__class__.__name__,
@@ -199,9 +209,23 @@ def encode(obj):
                     'index' : obj.index,
                     'dtype': obj.dtype.name,
                     'data': convert(obj.values) }
-    elif isinstance(obj, DataFrame):
+    elif issubclass(tobj, NDFrame):
         if isinstance(obj, SparseDataFrame):
-            import pdb; pdb.set_trace()
+            d = {'typ' : 'sparse_dataframe',
+                 'klass' : obj.__class__.__name__,
+                 'columns' : obj.columns }
+            for f in ['default_fill_value','default_kind']:
+                d[f] = getattr(obj,f,None)
+            d['data'] = dict([ (name,ss) for name,ss in obj.iteritems() ])
+            return d
+        elif isinstance(obj, SparsePanel):
+            d = {'typ' : 'sparse_panel',
+                 'klass' : obj.__class__.__name__,
+                 'items' : obj.items }
+            for f in ['default_fill_value','default_kind']:
+                d[f] = getattr(obj,f,None)
+            d['data'] = dict([ (name,df) for name,df in obj.iteritems() ])
+            return d
         else:
 
             data = obj._data
@@ -209,7 +233,7 @@ def encode(obj):
                 data = data.consolidate()
 
            # the block manager
-            return {'typ' : 'dataframe',
+            return {'typ' : 'block_manager',
                     'klass'  : obj.__class__.__name__,
                     'axes'   : data.axes,
                     'blocks' : [ { 'items'  : b.items, 
@@ -237,6 +261,17 @@ def encode(obj):
         return {'typ' : 'period',
                 'ordinal' : obj.ordinal,
                 'freq' : obj.freq }
+    elif isinstance(obj, BlockIndex):
+        return { 'typ' : 'block_index',
+                 'klass' : obj.__class__.__name__,
+                 'blocs' : obj.blocs,
+                 'blengths' : obj.blengths,
+                 'length' : obj.length }
+    elif isinstance(obj, IntIndex):
+        return { 'typ' : 'int_index',
+                 'klass' : obj.__class__.__name__,
+                 'indices' : obj.indices,
+                 'length' : obj.length }
     elif isinstance(obj, np.ndarray):
         return {'typ' : 'ndarray',
                 'shape': obj.shape,
@@ -288,7 +323,7 @@ def decode(obj):
         dtype = dtype_for(obj['dtype'])
         index = obj['index']
         return globals()[obj['klass']](obj['data'],index=index,dtype=dtype,name=obj['name'])
-    elif typ == 'dataframe':
+    elif typ == 'block_manager':
         axes = obj['axes']
 
         def create_block(b):
@@ -300,6 +335,20 @@ def create_block(b):
     elif typ == 'datetime':
         import pdb; pdb.set_trace()
         return datetime.fromtimestamp(obj['data'])
+    elif typ == 'sparse_series':
+        dtype = dtype_for(obj['dtype'])
+        return globals()[obj['klass']](np.array(obj['sp_values'],dtype=dtype),sparse_index=obj['sp_index'],
+                                       index=obj['index'],fill_value=obj['fill_value'],kind=obj['kind'],name=obj['name'])
+    elif typ == 'sparse_dataframe':
+        return globals()[obj['klass']](obj['data'],
+                                       columns=obj['columns'],default_fill_value=obj['default_fill_value'],default_kind=obj['default_kind'])
+    elif typ == 'sparse_panel':
+        return globals()[obj['klass']](obj['data'],
+                                       items=obj['items'],default_fill_value=obj['default_fill_value'],default_kind=obj['default_kind'])
+    elif typ == 'block_index':
+        return globals()[obj['klass']](obj['length'],obj['blocs'],obj['blengths'])
+    elif typ == 'int_index':
+        return globals()[obj['klass']](obj['length'],obj['indices'])
     elif typ == 'ndarray':
         return np.array(obj['data'],
                         dtype=np.typeDict[obj['dtype']],
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 294b99d20a85b..741df495fad7d 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -14,6 +14,10 @@
 from pandas.util.testing import ensure_clean
 from pandas.tests.test_series import assert_series_equal
 from pandas.tests.test_frame import assert_frame_equal
+from pandas.tests.test_panel import assert_panel_equal
+
+import pandas
+from pandas.sparse.tests.test_sparse import assert_sp_series_equal, assert_sp_frame_equal
 from pandas import concat, Timestamp, tslib
 
 from numpy.testing.decorators import slow
@@ -32,6 +36,8 @@ def check_arbitrary(a, b):
         assert(len(a) == len(b))
         for a_, b_ in zip(a,b):
             check_arbitrary(a_,b_)
+    elif isinstance(a,Panel):
+        assert_panel_equal(a,b)
     elif isinstance(a,DataFrame):
         assert_frame_equal(a,b)
     elif isinstance(a,Series):
@@ -225,10 +231,10 @@ def test_basic(self):
             i_rec = self.encode_decode(i)
             assert_series_equal(i,i_rec)
 
-class TestFrame(Test):
+class TestNDFrame(Test):
 
     def setUp(self):
-        super(TestFrame, self).setUp()
+        super(TestNDFrame, self).setUp()
 
         data = {
             'A': [0., 1., 2., 3., np.nan],
@@ -238,98 +244,107 @@ def setUp(self):
             'E' : [0., 1, Timestamp('20100101'),'foo',2.],
             }
         
-        self.d = { 'float' : DataFrame(dict(A = data['A'], B = Series(data['A']) + 1)),
-                   'int'   : DataFrame(dict(A = data['B'], B = Series(data['B']) + 1)),
-                   'mixed' : DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])) }
+        self.frame = { 'float' : DataFrame(dict(A = data['A'], B = Series(data['A']) + 1)),
+                       'int'   : DataFrame(dict(A = data['B'], B = Series(data['B']) + 1)),
+                       'mixed' : DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])) }
+        
+        self.panel = { 'float' : Panel(dict(ItemA = self.frame['float'], ItemB = self.frame['float']+1)) }
 
-    def test_basic(self):
+    def test_basic_frame(self):
 
-        for s, i in self.d.items():
+        for s, i in self.frame.items():
             i_rec = self.encode_decode(i)
             assert_frame_equal(i,i_rec)
 
+    def test_basic_panel(self):
+
+        for s, i in self.panel.items():
+            i_rec = self.encode_decode(i)
+            assert_panel_equal(i,i_rec)
+
     def test_multi(self):
 
-        i_rec = self.encode_decode(self.d)
-        for k in self.d.keys():
-            assert_frame_equal(self.d[k],i_rec[k])
+        i_rec = self.encode_decode(self.frame)
+        for k in self.frame.keys():
+            assert_frame_equal(self.frame[k],i_rec[k])
 
-        l = tuple([ self.d['float'], self.d['float'].A, self.d['float'].B, None ])
+        l = tuple([ self.frame['float'], self.frame['float'].A, self.frame['float'].B, None ])
         l_rec = self.encode_decode(l)
         check_arbitrary(l,l_rec)
 
         # this is an oddity in that packed lists will be returned as tuples
-        l = [ self.d['float'], self.d['float'].A, self.d['float'].B, None ]
+        l = [ self.frame['float'], self.frame['float'].A, self.frame['float'].B, None ]
         l_rec = self.encode_decode(l)
         self.assert_(isinstance(l_rec,tuple))
         check_arbitrary(l,l_rec)
 
     def test_iterator(self):
 
-        l = [ self.d['float'], self.d['float'].A, self.d['float'].B, None ]
+        l = [ self.frame['float'], self.frame['float'].A, self.frame['float'].B, None ]
 
         with ensure_clean(self.path) as path:
             to_msgpack(path,*l)
             for i, packed in enumerate(read_msgpack(path, iterator=True)):
                 check_arbitrary(packed,l[i])
 
-def _create_sp_series():
+class TestSparse(Test):
 
-    # nan-based
-    arr = np.arange(15, dtype=float)
-    index = np.arange(15)
-    arr[7:12] = nan
-    arr[-1:] = nan
+    def _check_roundtrip(self, obj, comparator, **kwargs):
 
-    date_index = bdate_range('1/1/2011', periods=len(index))
-    bseries = SparseSeries(arr, index=index, kind='block')
-    bseries.name = 'bseries'
-    return bseries
+        i_rec = self.encode_decode(obj)
+        comparator(obj,i_rec,**kwargs)
 
-def _create_sp_frame():
+    def test_sparse_series(self):
 
-    data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
-            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
-            'C': np.arange(10),
-            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
-    
-    dates = bdate_range('1/1/2011', periods=10)
-    return SparseDataFrame(data, index=dates)
+        s = tm.makeStringSeries()
+        s[3:5] = np.nan
+        ss = s.to_sparse()
+        self._check_roundtrip(ss, tm.assert_series_equal,
+                              check_series_type=True)
+        
+        ss2 = s.to_sparse(kind='integer')
+        self._check_roundtrip(ss2, tm.assert_series_equal,
+                              check_series_type=True)
+        
+        ss3 = s.to_sparse(fill_value=0)
+        self._check_roundtrip(ss3, tm.assert_series_equal,
+                              check_series_type=True)
+
+    def test_sparse_frame(self):
+
+        s = tm.makeDataFrame()
+        s.ix[3:5, 1:3] = np.nan
+        s.ix[8:10, -2] = np.nan
+        ss = s.to_sparse()
+
+        self._check_roundtrip(ss, tm.assert_frame_equal,
+                              check_frame_type=True)
+        
+        ss2 = s.to_sparse(kind='integer')
+        self._check_roundtrip(ss2, tm.assert_frame_equal,
+                              check_frame_type=True)
+        
+        ss3 = s.to_sparse(fill_value=0)
+        self._check_roundtrip(ss3, tm.assert_frame_equal,
+                              check_frame_type=True)
+            
+    def test_sparse_panel(self):
+
+        items = ['x', 'y', 'z']
+        p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items))
+        sp = p.to_sparse()
+        
+        self._check_roundtrip(sp, tm.assert_panel_equal,
+                              check_panel_type=True)
+        
+        sp2 = p.to_sparse(kind='integer')
+        self._check_roundtrip(sp2, tm.assert_panel_equal,
+                              check_panel_type=True)
+        
+        sp3 = p.to_sparse(fill_value=0)
+        self._check_roundtrip(sp3, tm.assert_panel_equal,
+                              check_panel_type=True)
 
-def create_data():
-    """ create the pickle data """
-    
-    data = {
-        'A': [0., 1., 2., 3., np.nan],
-        'B': [0, 1, 0, 1, 0],
-        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
-        'D': date_range('1/1/2009', periods=5),
-        'E' : [0., 1, Timestamp('20100101'),'foo',2.],
-        }
-    
-    index  = dict(int   = Index(np.arange(10)),
-                  date  = date_range('20130101',periods=10))
-    mi     = dict(reg   = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
-                                                      ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
-                                                 names=['first', 'second']))
-    series = dict(float = Series(data['A']),
-                  int   = Series(data['B']),
-                  mixed = Series(data['E']))
-    frame  = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
-                  int   = DataFrame(dict(A = series['int']  , B = series['int']   + 1)),
-                  mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
-    panel  = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
-
- 
-
-    return dict( series = series, 
-                 frame  = frame, 
-                 panel  = panel,
-                 index  = index,
-                 mi     = mi,
-                 sp_series = dict(float = _create_sp_series()),
-                 sp_frame  = dict(float = _create_sp_frame())
-                 )
 
 if __name__ == '__main__':
     import nose

From a55e7e4e2f977b2bbfb8099c5d5530f157022a70 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sat, 4 May 2013 18:43:12 -0400
Subject: [PATCH 4/5] ENH: handle np.datetime64,np.timedelta64,date,timedelta
 types

---
 pandas/io/packers.py            | 65 ++++++++++++++++++++++-----------
 pandas/io/tests/test_packers.py | 16 ++++++++
 2 files changed, 60 insertions(+), 21 deletions(-)

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index c31b3c4b98719..f90ff34cfde92 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -40,12 +40,13 @@
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
 
-from datetime import datetime
+from datetime import datetime, date, timedelta
 import time
 import re
 import copy
 import itertools
 import warnings
+from dateutil.parser import parse
 
 import numpy as np
 from pandas import (
@@ -128,8 +129,12 @@ def read_msgpack(path, iterator=False, **kwargs):
             return l[0]
         return l
 
-dtype_dict = { 'datetime64[ns]'  : np.dtype('M8[ns]'),
-               'timedelta64[ns]' : np.dtype('m8[ns]') }
+dtype_dict = { 21 : np.dtype('M8[ns]'),
+               u'datetime64[ns]' : np.dtype('M8[ns]'),
+               u'datetime64[us]' : np.dtype('M8[us]'),
+               22 : np.dtype('m8[ns]'),
+               u'timedelta64[ns]' : np.dtype('m8[ns]'),
+               u'timedelta64[us]' : np.dtype('m8[us]') }
 
 def dtype_for(t):
     if t in dtype_dict:
@@ -169,13 +174,13 @@ def encode(obj):
             return {'typ' : 'period_index',
                     'klass' : obj.__class__.__name__,
                     'name' : getattr(obj,'name',None),
-                    'dtype': obj.dtype.name,
+                    'dtype': obj.dtype.num,
                     'data': obj.tolist() }
         elif isinstance(obj, DatetimeIndex):
             return {'typ' : 'datetime_index',
                     'klass' : obj.__class__.__name__,
                     'name' : getattr(obj,'name',None),
-                    'dtype': obj.dtype.name,
+                    'dtype': obj.dtype.num,
                     'data': obj.values.view('i8').tolist(),
                     'freq' : obj.freqstr,
                     'tz'   : obj.tz}
@@ -183,19 +188,19 @@ def encode(obj):
             return {'typ' : 'multi_index',
                     'klass' : obj.__class__.__name__,
                     'names' : getattr(obj,'names',None),
-                    'dtype': obj.dtype.name,
+                    'dtype': obj.dtype.num,
                     'data': obj.tolist() }
         else:
             return {'typ' : 'index',
                     'klass' : obj.__class__.__name__,
                     'name' : getattr(obj,'name',None),
-                    'dtype': obj.dtype.name,
+                    'dtype': obj.dtype.num,
                     'data': obj.tolist() }
     elif isinstance(obj, Series):
         if isinstance(obj, SparseSeries):
             d = {'typ' : 'sparse_series',
                  'klass' : obj.__class__.__name__,
-                 'dtype': obj.dtype.name,
+                 'dtype': obj.dtype.num,
                  'index' : obj.index,
                  'sp_index' : obj.sp_index,
                  'sp_values' : convert(obj.sp_values)}
@@ -207,7 +212,7 @@ def encode(obj):
                     'klass' : obj.__class__.__name__,
                     'name' : getattr(obj,'name',None),
                     'index' : obj.index,
-                    'dtype': obj.dtype.name,
+                    'dtype': obj.dtype.num,
                     'data': convert(obj.values) }
     elif issubclass(tobj, NDFrame):
         if isinstance(obj, SparseDataFrame):
@@ -239,11 +244,11 @@ def encode(obj):
                     'blocks' : [ { 'items'  : b.items, 
                                    'values' : convert(b.values), 
                                    'shape'  : b.values.shape,
-                                   'dtype'  : b.dtype.name,
+                                   'dtype'  : b.dtype.num,
                                    'klass' : b.__class__.__name__ 
                                    } for b in data.blocks ] }
 
-    elif isinstance(obj, datetime):
+    elif isinstance(obj, (datetime,date,timedelta)):
         if isinstance(obj, Timestamp):
             tz = obj.tzinfo
             if tz is not None:
@@ -255,8 +260,16 @@ def encode(obj):
                     'value': obj.value,
                     'offset' : offset,
                     'tz' : tz}
-        return { 'typ' : 'datetime',
-                 'data' : obj.isoformat() }
+        elif isinstance(obj, timedelta):
+            return { 'typ' : 'timedelta',
+                     'data' : (obj.days,obj.seconds,obj.microseconds) }
+        elif isinstance(obj, datetime):
+            return { 'typ' : 'datetime',
+                     'data' : obj.isoformat() }
+        elif isinstance(obj, date):
+            return { 'typ' : 'date',
+                     'data' : obj.isoformat() }
+        raise Exception("cannot encode this datetimelike object: %s" % obj)
     elif isinstance(obj, Period):
         return {'typ' : 'period',
                 'ordinal' : obj.ordinal,
@@ -276,8 +289,11 @@ def encode(obj):
         return {'typ' : 'ndarray',
                 'shape': obj.shape,
                 'ndim': obj.ndim,
-                'dtype': obj.dtype.name,
+                'dtype': obj.dtype.num,
                 'data': convert(obj)}
+    elif isinstance(obj, np.timedelta64):
+        return { 'typ' : 'np_timedelta64',
+                 'data' : obj.view('i8') }
     elif isinstance(obj, np.number):
         if np.iscomplexobj(obj):
             return {'typ' : 'np_scalar',
@@ -293,9 +309,8 @@ def encode(obj):
         return {'typ' : 'np_complex',
                 'real': obj.real.__repr__(),
                 'imag': obj.imag.__repr__()}
-    else:
-        import pdb; pdb.set_trace()
-        return obj
+
+    return obj
 
 def decode(obj):
     """
@@ -333,8 +348,11 @@ def create_block(b):
         blocks = [ create_block(b) for b in obj['blocks'] ]
         return globals()[obj['klass']](BlockManager(blocks, axes))
     elif typ == 'datetime':
-        import pdb; pdb.set_trace()
-        return datetime.fromtimestamp(obj['data'])
+        return parse(obj['data'])
+    elif typ == 'date':
+        return parse(obj['data']).date()
+    elif typ == 'timedelta':
+        return timedelta(*obj['data'])
     elif typ == 'sparse_series':
         dtype = dtype_for(obj['dtype'])
         return globals()[obj['klass']](np.array(obj['sp_values'],dtype=dtype),sparse_index=obj['sp_index'],
@@ -353,17 +371,22 @@ def create_block(b):
         return np.array(obj['data'],
                         dtype=np.typeDict[obj['dtype']],
                         ndmin=obj['ndim']).reshape(obj['shape'])
+    elif typ == 'np_timedelta64':
+        return np.timedelta64(obj['data'])
     elif typ == 'np_scalar':
         if obj.get('sub_typ') == 'np_complex':
             return c2f(obj['real'], obj['imag'], obj['dtype'])
         else:
-            return np.typeDict[obj['dtype']](obj['data'])
+            dtype = dtype_for(obj['dtype'])
+            try:
+                return dtype(obj['data'])
+            except:
+                return dtype.type(obj['data'])
     elif typ == 'np_complex':
         return complex(obj['real']+'+'+obj['imag']+'j')
     elif isinstance(obj, (dict,list,set)):
         return obj
     else:
-        import pdb; pdb.set_trace()
         return obj
 
 def pack(o, default=encode, 
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
index 741df495fad7d..f9e25f3956d38 100644
--- a/pandas/io/tests/test_packers.py
+++ b/pandas/io/tests/test_packers.py
@@ -155,6 +155,22 @@ def test_timestamp(self):
             i_rec = self.encode_decode(i)
             self.assert_(i == i_rec)
 
+    def test_datetimes(self):
+
+        for i in [ datetime.datetime(2013,1,1), datetime.datetime(2013,1,1,5,1),
+                   datetime.date(2013,1,1), np.datetime64('2013-01-05 2:15') ]:
+            i_rec = self.encode_decode(i)
+            self.assert_(i == i_rec)
+
+    def test_timedeltas(self):
+
+        for i in [ datetime.timedelta(days=1),
+                   datetime.timedelta(days=1,seconds=10),
+                   np.timedelta64(1000000) ]:
+            i_rec = self.encode_decode(i)
+            self.assert_(i == i_rec)
+
+
 class TestIndex(Test):
 
     def setUp(self):

From 5a02cdf0f36fd37853583671ec348882a01b30e2 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Sun, 5 May 2013 20:25:45 -0400
Subject: [PATCH 5/5] TST: added compression (zlib/blosc) via big hack

---
 pandas/io/packers.py | 93 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 15 deletions(-)

diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index f90ff34cfde92..bc32c3c4d4011 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -70,6 +70,18 @@
 except:
     _USE_MSGPACK = False
 
+import zlib
+
+try:
+    import blosc
+    _BLOSC = True
+except:
+    _BLOSC = False
+
+## until we can pass this into our conversion functions,
+## this is pretty hacky
+compressor = None
+
 def to_msgpack(path, *args, **kwargs):
     """
     msgpack (serialize) object to input file path
@@ -82,10 +94,13 @@ def to_msgpack(path, *args, **kwargs):
 
     append : boolean whether to append to an existing msgpack
              (default is False)
+    compress : type of compressor (zlib or blosc), default to None (no compression)
     """
     if not _USE_MSGPACK:
         raise Exception("please install msgpack to create msgpack stores!")
 
+    global compressor
+    compressor = kwargs.get('compress')
     append = kwargs.get('append')
     if append:
         f = open(path, 'a+b')
@@ -154,14 +169,60 @@ def c2f(r, i, ctype_name):
     ftype = c2f_dict[ctype_name]
     return np.typeDict[ctype_name](ftype(r)+1j*ftype(i))
 
+
 def convert(values):
     """ convert the numpy values to a list """
 
     dtype = values.dtype
     if needs_i8_conversion(dtype):
         values = values.view('i8')
-    return values.ravel().tolist()
+    v = values.ravel()
+
+    if compressor == 'zlib':
+
+        # return string arrays like they are
+        if dtype == np.object_:
+            return v.tolist()
+
+        # convert to a bytes array
+        v = v.tostring()
+        return zlib.compress(v)
+
+    elif compressor == 'blosc' and _BLOSC:
+
+        # return string arrays like they are
+        if dtype == np.object_:
+            return v.tolist()
+
+        # convert to a bytes array
+        v = v.tostring()
+        return blosc.compress(v,typesize=dtype.itemsize)
+
+    # as a list
+    return v.tolist()
+
+def unconvert(values, dtype, compress):
+
+    if dtype == np.object_:
+        return np.array(values,dtype=object)
+
+    if compress == 'zlib':
+
+        values = zlib.decompress(values)
+        return np.frombuffer(values,dtype=dtype)
+
+    elif compress == 'blosc':
+
+        if not _BLOSC:
+            raise Exception("cannot uncompress w/o blosc")
+
+        # decompress
+        values = blosc.decompress(values)
+
+        return np.frombuffer(values,dtype=dtype)
 
+    # as a list
+    return np.array(values,dtype=dtype)
 
 def encode(obj):
     """
@@ -203,7 +264,8 @@ def encode(obj):
                  'dtype': obj.dtype.num,
                  'index' : obj.index,
                  'sp_index' : obj.sp_index,
-                 'sp_values' : convert(obj.sp_values)}
+                 'sp_values' : convert(obj.sp_values),
+                 'compress' : compressor}
             for f in ['name','fill_value','kind']:
                 d[f] = getattr(obj,f,None)
             return d
@@ -213,7 +275,8 @@ def encode(obj):
                     'name' : getattr(obj,'name',None),
                     'index' : obj.index,
                     'dtype': obj.dtype.num,
-                    'data': convert(obj.values) }
+                    'data': convert(obj.values),
+                    'compress' : compressor}
     elif issubclass(tobj, NDFrame):
         if isinstance(obj, SparseDataFrame):
             d = {'typ' : 'sparse_dataframe',
@@ -245,7 +308,8 @@ def encode(obj):
                                    'values' : convert(b.values), 
                                    'shape'  : b.values.shape,
                                    'dtype'  : b.dtype.num,
-                                   'klass' : b.__class__.__name__ 
+                                   'klass' : b.__class__.__name__,
+                                   'compress' : compressor
                                    } for b in data.blocks ] }
 
     elif isinstance(obj, (datetime,date,timedelta)):
@@ -290,7 +354,8 @@ def encode(obj):
                 'shape': obj.shape,
                 'ndim': obj.ndim,
                 'dtype': obj.dtype.num,
-                'data': convert(obj)}
+                'data': convert(obj),
+                'compress' : compressor }
     elif isinstance(obj, np.timedelta64):
         return { 'typ' : 'np_timedelta64',
                  'data' : obj.view('i8') }
@@ -337,13 +402,13 @@ def decode(obj):
     elif typ == 'series':
         dtype = dtype_for(obj['dtype'])
         index = obj['index']
-        return globals()[obj['klass']](obj['data'],index=index,dtype=dtype,name=obj['name'])
+        return globals()[obj['klass']](unconvert(obj['data'],dtype,obj['compress']),index=index,name=obj['name'])
     elif typ == 'block_manager':
         axes = obj['axes']
 
         def create_block(b):
             dtype = dtype_for(b['dtype'])
-            return make_block(np.array(b['values'],dtype=dtype).reshape(b['shape']),b['items'],axes[0],klass=getattr(internals,b['klass'])) 
+            return make_block(unconvert(b['values'],dtype,b['compress']).reshape(b['shape']),b['items'],axes[0],klass=getattr(internals,b['klass'])) 
 
         blocks = [ create_block(b) for b in obj['blocks'] ]
         return globals()[obj['klass']](BlockManager(blocks, axes))
@@ -355,7 +420,7 @@ def create_block(b):
         return timedelta(*obj['data'])
     elif typ == 'sparse_series':
         dtype = dtype_for(obj['dtype'])
-        return globals()[obj['klass']](np.array(obj['sp_values'],dtype=dtype),sparse_index=obj['sp_index'],
+        return globals()[obj['klass']](unconvert(obj['sp_values'],dtype,obj['compress']),sparse_index=obj['sp_index'],
                                        index=obj['index'],fill_value=obj['fill_value'],kind=obj['kind'],name=obj['name'])
     elif typ == 'sparse_dataframe':
         return globals()[obj['klass']](obj['data'],
@@ -368,9 +433,7 @@ def create_block(b):
     elif typ == 'int_index':
         return globals()[obj['klass']](obj['length'],obj['indices'])
     elif typ == 'ndarray':
-        return np.array(obj['data'],
-                        dtype=np.typeDict[obj['dtype']],
-                        ndmin=obj['ndim']).reshape(obj['shape'])
+        return unconvert(obj['data'],np.typeDict[obj['dtype']],obj['compress']).reshape(obj['shape'])
     elif typ == 'np_timedelta64':
         return np.timedelta64(obj['data'])
     elif typ == 'np_scalar':
@@ -390,7 +453,7 @@ def create_block(b):
         return obj
 
 def pack(o, default=encode, 
-         encoding='utf-8', unicode_errors='strict', use_single_float=False):
+         encoding=None, unicode_errors='strict', use_single_float=False):
     """
     Pack an object and return the packed bytes.
     """
@@ -400,7 +463,7 @@ def pack(o, default=encode,
            use_single_float=use_single_float).pack(o)
 
 def unpack(packed, object_hook=decode, 
-           list_hook=None, use_list=False, encoding='utf-8',
+           list_hook=None, use_list=False, encoding=None,
            unicode_errors='strict', object_pairs_hook=None):
     """
     Unpack a packed object, return an iterator
@@ -417,7 +480,7 @@ def unpack(packed, object_hook=decode,
 
     class Packer(_packer.Packer):
         def __init__(self, default=encode, 
-                     encoding='utf-8',
+                     encoding=None,
                      unicode_errors='strict',
                      use_single_float=False):
             super(Packer, self).__init__(default=default, 
@@ -428,7 +491,7 @@ def __init__(self, default=encode,
     class Unpacker(_unpacker.Unpacker):
         def __init__(self, file_like=None, read_size=0, use_list=False,
                      object_hook=decode,
-                     object_pairs_hook=None, list_hook=None, encoding='utf-8',
+                     object_pairs_hook=None, list_hook=None, encoding=None,
                      unicode_errors='strict', max_buffer_size=0):
             super(Unpacker, self).__init__(file_like=file_like, 
                                            read_size=read_size,