diff --git a/LICENSES/MSGPACK_LICENSE b/LICENSES/MSGPACK_LICENSE new file mode 100644 index 0000000000000..ae1b0f2f32f06 --- /dev/null +++ b/LICENSES/MSGPACK_LICENSE @@ -0,0 +1,13 @@ +Copyright (C) 2008-2011 INADA Naoki + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/LICENSES/MSGPACK_NUMPY_LICENSE b/LICENSES/MSGPACK_NUMPY_LICENSE new file mode 100644 index 0000000000000..e570011efac73 --- /dev/null +++ b/LICENSES/MSGPACK_NUMPY_LICENSE @@ -0,0 +1,33 @@ +.. -*- rst -*- + +License +======= + +Copyright (c) 2013, Lev Givon. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. +* Neither the name of Lev Givon nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/doc/source/io.rst b/doc/source/io.rst index 5e04fcff61539..9442f59425106 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -36,6 +36,7 @@ object. * ``read_hdf`` * ``read_sql`` * ``read_json`` + * ``read_msgpack`` (experimental) * ``read_html`` * ``read_stata`` * ``read_clipboard`` @@ -48,6 +49,7 @@ The corresponding ``writer`` functions are object methods that are accessed like * ``to_hdf`` * ``to_sql`` * ``to_json`` + * ``to_msgpack`` (experimental) * ``to_html`` * ``to_stata`` * ``to_clipboard`` @@ -1732,6 +1734,72 @@ module is installed you can use it as a xlsx writer engine as follows: .. _io.hdf5: +Serialization +------------- + +msgpack (experimental) +~~~~~~~~~~~~~~~~~~~~~~ + +.. _io.msgpack: + +.. versionadded:: 0.13.0 + +Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +object serialization. This is a lightweight portable binary format, similar +to binary JSON, that is highly space efficient, and provides good performance +both on the writing (serialization), and reading (deserialization). + +.. warning:: + + This is a very new feature of pandas. We intend to provide certain + optimizations in the io of the ``msgpack`` data. Since this is marked + as an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. + +.. ipython:: python + + df = DataFrame(np.random.rand(5,2),columns=list('AB')) + df.to_msgpack('foo.msg') + pd.read_msgpack('foo.msg') + s = Series(np.random.rand(5),index=date_range('20130101',periods=5)) + +You can pass a list of objects and you will receive them back on deserialization. + +.. ipython:: python + + pd.to_msgpack('foo.msg', df, 'foo', np.array([1,2,3]), s) + pd.read_msgpack('foo.msg') + +You can pass ``iterator=True`` to iterate over the unpacked results + +.. ipython:: python + + for o in pd.read_msgpack('foo.msg',iterator=True): + print o + +You can pass ``append=True`` to the writer to append to an existing pack + +.. ipython:: python + + df.to_msgpack('foo.msg',append=True) + pd.read_msgpack('foo.msg') + +Unlike other io methods, ``to_msgpack`` is available on both a per-object basis, +``df.to_msgpack()`` and using the top-level ``pd.to_msgpack(...)`` where you +can pack arbitrary collections of python lists, dicts, scalars, while intermixing +pandas objects. + +.. ipython:: python + + pd.to_msgpack('foo2.msg', { 'dict' : [ { 'df' : df }, { 'string' : 'foo' }, { 'scalar' : 1. }, { 's' : s } ] }) + pd.read_msgpack('foo2.msg') + +.. ipython:: python + :suppress: + :okexcept: + + os.remove('foo.msg') + os.remove('foo2.msg') + HDF5 (PyTables) --------------- diff --git a/doc/source/release.rst b/doc/source/release.rst index 65e6ca0e1d95c..be62ef7d31a0b 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -64,17 +64,19 @@ New features Experimental Features ~~~~~~~~~~~~~~~~~~~~~ -- The new :func:`~pandas.eval` function implements expression evaluation using - ``numexpr`` behind the scenes. This results in large speedups for complicated - expressions involving large DataFrames/Series. -- :class:`~pandas.DataFrame` has a new :meth:`~pandas.DataFrame.eval` that - evaluates an expression in the context of the ``DataFrame``. -- A :meth:`~pandas.DataFrame.query` method has been added that allows - you to select elements of a ``DataFrame`` using a natural query syntax nearly - identical to Python syntax. -- ``pd.eval`` and friends now evaluate operations involving ``datetime64`` - objects in Python space because ``numexpr`` cannot handle ``NaT`` values - (:issue:`4897`). + - The new :func:`~pandas.eval` function implements expression evaluation using + ``numexpr`` behind the scenes. This results in large speedups for complicated + expressions involving large DataFrames/Series. + - :class:`~pandas.DataFrame` has a new :meth:`~pandas.DataFrame.eval` that + evaluates an expression in the context of the ``DataFrame``. + - A :meth:`~pandas.DataFrame.query` method has been added that allows + you to select elements of a ``DataFrame`` using a natural query syntax nearly + identical to Python syntax. + - ``pd.eval`` and friends now evaluate operations involving ``datetime64`` + objects in Python space because ``numexpr`` cannot handle ``NaT`` values + (:issue:`4897`). + - Add msgpack support via ``pd.read_msgpack()`` and ``pd.to_msgpack()/df.to_msgpack()`` for serialization + of arbitrary pandas (and python objects) in a lightweight portable binary format (:issue:`686`) Improvements to existing features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 5ff7038d02e45..98099bac15900 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -464,6 +464,15 @@ Enhancements t = Timestamp('20130101 09:01:02') t + pd.datetools.Nano(123) + - The ``isin`` method plays nicely with boolean indexing. To get the rows where each condition is met: + + .. ipython:: python + + mask = df.isin({'A': [1, 2], 'B': ['e', 'f']}) + df[mask.all(1)] + + See the :ref:`documentation` for more. + .. _whatsnew_0130.experimental: Experimental @@ -553,21 +562,35 @@ Experimental For more details see the :ref:`indexing documentation on query `. - - DataFrame now has an ``isin`` method that can be used to easily check whether the DataFrame's values are contained in an iterable. Use a dictionary if you'd like to check specific iterables for specific columns or rows. +- ``pd.read_msgpack()`` and ``pd.to_msgpack()`` are now a supported method of serialization + of arbitrary pandas (and python objects) in a lightweight portable binary format. :ref:`See the docs` - .. ipython:: python + .. warning:: + + Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. - df = pd.DataFrame({'A': [1, 2, 3], 'B': ['d', 'e', 'f']}) - df.isin({'A': [1, 2], 'B': ['e', 'f']}) + .. ipython:: python - The ``isin`` method plays nicely with boolean indexing. To get the rows where each condition is met: + df = DataFrame(np.random.rand(5,2),columns=list('AB')) + df.to_msgpack('foo.msg') + pd.read_msgpack('foo.msg') - .. ipython:: python + s = Series(np.random.rand(5),index=date_range('20130101',periods=5)) + pd.to_msgpack('foo.msg', df, s) + pd.read_msgpack('foo.msg') - mask = df.isin({'A': [1, 2], 'B': ['e', 'f']}) - df[mask.all(1)] + You can pass ``iterator=True`` to iterator over the unpacked results + + .. ipython:: python + + for o in pd.read_msgpack('foo.msg',iterator=True): + print o + + .. ipython:: python + :suppress: + :okexcept: - See the :ref:`documentation` for more. + os.remove('foo.msg') .. _whatsnew_0130.refactoring: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 835b66512a89e..3142f74f2f5c5 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -805,6 +805,25 @@ def to_hdf(self, path_or_buf, key, **kwargs): from pandas.io import pytables return pytables.to_hdf(path_or_buf, key, self, **kwargs) + def to_msgpack(self, path_or_buf, **kwargs): + """ + msgpack (serialize) object to input file path + + THIS IS AN EXPERIMENTAL LIBRARY and the storage format + may not be stable until a future release. + + Parameters + ---------- + path : string File path + args : an object or objects to serialize + append : boolean whether to append to an existing msgpack + (default is False) + compress : type of compressor (zlib or blosc), default to None (no compression) + """ + + from pandas.io import packers + return packers.to_msgpack(path_or_buf, self, **kwargs) + def to_pickle(self, path): """ Pickle (serialize) object to input file path diff --git a/pandas/io/api.py b/pandas/io/api.py index 94deb51ab4b18..dc9ea290eb45e 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -11,3 +11,4 @@ from pandas.io.sql import read_sql from pandas.io.stata import read_stata from pandas.io.pickle import read_pickle, to_pickle +from pandas.io.packers import read_msgpack, to_msgpack diff --git a/pandas/io/packers.py b/pandas/io/packers.py new file mode 100644 index 0000000000000..d6aa1ebeb896a --- /dev/null +++ b/pandas/io/packers.py @@ -0,0 +1,534 @@ +""" +Msgpack serializer support for reading and writing pandas data structures +to disk +""" + +# portions of msgpack_numpy package, by Lev Givon were incorporated +# into this module (and tests_packers.py) + +""" +License +======= + +Copyright (c) 2013, Lev Givon. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. +* Neither the name of Lev Givon nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" + +from datetime import datetime, date, timedelta +from dateutil.parser import parse + +import numpy as np +from pandas import compat +from pandas.compat import u +from pandas import ( + Timestamp, Period, Series, DataFrame, Panel, Panel4D, + Index, MultiIndex, Int64Index, PeriodIndex, DatetimeIndex, Float64Index, NaT +) +from pandas.sparse.api import SparseSeries, SparseDataFrame, SparsePanel +from pandas.sparse.array import BlockIndex, IntIndex +from pandas.core.generic import NDFrame +from pandas.core.common import needs_i8_conversion +from pandas.core.internals import BlockManager, make_block +import pandas.core.internals as internals + +from pandas.msgpack import Unpacker as _Unpacker, Packer as _Packer +import zlib + +try: + import blosc + _BLOSC = True +except: + _BLOSC = False + +# until we can pass this into our conversion functions, +# this is pretty hacky +compressor = None + + +def to_msgpack(path, *args, **kwargs): + """ + msgpack (serialize) object to input file path + + THIS IS AN EXPERIMENTAL LIBRARY and the storage format + may not be stable until a future release. + + Parameters + ---------- + path : string File path + args : an object or objects to serialize + append : boolean whether to append to an existing msgpack + (default is False) + compress : type of compressor (zlib or blosc), default to None (no compression) + """ + global compressor + compressor = kwargs.pop('compress', None) + append = kwargs.pop('append', None) + if append: + f = open(path, 'a+b') + else: + f = open(path, 'wb') + try: + for a in args: + f.write(pack(a, **kwargs)) + finally: + f.close() + + +def read_msgpack(path, iterator=False, **kwargs): + """ + Load msgpack pandas object from the specified + file path + + THIS IS AN EXPERIMENTAL LIBRARY and the storage format + may not be stable until a future release. + + Parameters + ---------- + path : string + File path + iterator : boolean, if True, return an iterator to the unpacker + (default is False) + + Returns + ------- + obj : type of object stored in file + + """ + if iterator: + return Iterator(path) + + with open(path, 'rb') as fh: + l = list(unpack(fh)) + if len(l) == 1: + return l[0] + return l + +dtype_dict = {21: np.dtype('M8[ns]'), + u('datetime64[ns]'): np.dtype('M8[ns]'), + u('datetime64[us]'): np.dtype('M8[us]'), + 22: np.dtype('m8[ns]'), + u('timedelta64[ns]'): np.dtype('m8[ns]'), + u('timedelta64[us]'): np.dtype('m8[us]')} + + +def dtype_for(t): + if t in dtype_dict: + return dtype_dict[t] + return np.typeDict[t] + +c2f_dict = {'complex': np.float64, + 'complex128': np.float64, + 'complex64': np.float32} + +# numpy 1.6.1 compat +if hasattr(np, 'float128'): + c2f_dict['complex256'] = np.float128 + + +def c2f(r, i, ctype_name): + """ + Convert strings to complex number instance with specified numpy type. + """ + + ftype = c2f_dict[ctype_name] + return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i)) + + +def convert(values): + """ convert the numpy values to a list """ + + dtype = values.dtype + if needs_i8_conversion(dtype): + values = values.view('i8') + v = values.ravel() + + if compressor == 'zlib': + + # return string arrays like they are + if dtype == np.object_: + return v.tolist() + + # convert to a bytes array + v = v.tostring() + return zlib.compress(v) + + elif compressor == 'blosc' and _BLOSC: + + # return string arrays like they are + if dtype == np.object_: + return v.tolist() + + # convert to a bytes array + v = v.tostring() + return blosc.compress(v, typesize=dtype.itemsize) + + # ndarray (on original dtype) + if dtype == 'float64' or dtype == 'int64': + return v + + # as a list + return v.tolist() + + +def unconvert(values, dtype, compress=None): + + if dtype == np.object_: + return np.array(values, dtype=object) + + if compress == 'zlib': + + values = zlib.decompress(values) + return np.frombuffer(values, dtype=dtype) + + elif compress == 'blosc': + + if not _BLOSC: + raise Exception("cannot uncompress w/o blosc") + + # decompress + values = blosc.decompress(values) + + return np.frombuffer(values, dtype=dtype) + + # as a list + return np.array(values, dtype=dtype) + + +def encode(obj): + """ + Data encoder + """ + + tobj = type(obj) + if isinstance(obj, Index): + if isinstance(obj, PeriodIndex): + return {'typ': 'period_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'freq': obj.freqstr, + 'dtype': obj.dtype.num, + 'data': convert(obj.asi8)} + elif isinstance(obj, DatetimeIndex): + return {'typ': 'datetime_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'dtype': obj.dtype.num, + 'data': convert(obj.asi8), + 'freq': obj.freqstr, + 'tz': obj.tz} + elif isinstance(obj, MultiIndex): + return {'typ': 'multi_index', + 'klass': obj.__class__.__name__, + 'names': getattr(obj, 'names', None), + 'dtype': obj.dtype.num, + 'data': convert(obj.values)} + else: + return {'typ': 'index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'dtype': obj.dtype.num, + 'data': obj.tolist()} + elif isinstance(obj, Series): + if isinstance(obj, SparseSeries): + d = {'typ': 'sparse_series', + 'klass': obj.__class__.__name__, + 'dtype': obj.dtype.num, + 'index': obj.index, + 'sp_index': obj.sp_index, + 'sp_values': convert(obj.sp_values), + 'compress': compressor} + for f in ['name', 'fill_value', 'kind']: + d[f] = getattr(obj, f, None) + return d + else: + return {'typ': 'series', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'index': obj.index, + 'dtype': obj.dtype.num, + 'data': convert(obj.values), + 'compress': compressor} + elif issubclass(tobj, NDFrame): + if isinstance(obj, SparseDataFrame): + d = {'typ': 'sparse_dataframe', + 'klass': obj.__class__.__name__, + 'columns': obj.columns} + for f in ['default_fill_value', 'default_kind']: + d[f] = getattr(obj, f, None) + d['data'] = dict([(name, ss) + for name, ss in compat.iteritems(obj)]) + return d + elif isinstance(obj, SparsePanel): + d = {'typ': 'sparse_panel', + 'klass': obj.__class__.__name__, + 'items': obj.items} + for f in ['default_fill_value', 'default_kind']: + d[f] = getattr(obj, f, None) + d['data'] = dict([(name, df) + for name, df in compat.iteritems(obj)]) + return d + else: + + data = obj._data + if not data.is_consolidated(): + data = data.consolidate() + + # the block manager + return {'typ': 'block_manager', + 'klass': obj.__class__.__name__, + 'axes': data.axes, + 'blocks': [{'items': b.items, + 'values': convert(b.values), + 'shape': b.values.shape, + 'dtype': b.dtype.num, + 'klass': b.__class__.__name__, + 'compress': compressor + } for b in data.blocks]} + + elif isinstance(obj, (datetime, date, np.datetime64, timedelta, np.timedelta64)): + if isinstance(obj, Timestamp): + tz = obj.tzinfo + if tz is not None: + tz = tz.zone + offset = obj.offset + if offset is not None: + offset = offset.freqstr + return {'typ': 'timestamp', + 'value': obj.value, + 'offset': offset, + 'tz': tz} + elif isinstance(obj, np.timedelta64): + return {'typ': 'timedelta64', + 'data': obj.view('i8')} + elif isinstance(obj, timedelta): + return {'typ': 'timedelta', + 'data': (obj.days, obj.seconds, obj.microseconds)} + elif isinstance(obj, np.datetime64): + return {'typ': 'datetime64', + 'data': str(obj)} + elif isinstance(obj, datetime): + return {'typ': 'datetime', + 'data': obj.isoformat()} + elif isinstance(obj, date): + return {'typ': 'date', + 'data': obj.isoformat()} + raise Exception("cannot encode this datetimelike object: %s" % obj) + elif isinstance(obj, Period): + return {'typ': 'period', + 'ordinal': obj.ordinal, + 'freq': obj.freq} + elif isinstance(obj, BlockIndex): + return {'typ': 'block_index', + 'klass': obj.__class__.__name__, + 'blocs': obj.blocs, + 'blengths': obj.blengths, + 'length': obj.length} + elif isinstance(obj, IntIndex): + return {'typ': 'int_index', + 'klass': obj.__class__.__name__, + 'indices': obj.indices, + 'length': obj.length} + elif isinstance(obj, np.ndarray) and obj.dtype not in ['float64', 'int64']: + return {'typ': 'ndarray', + 'shape': obj.shape, + 'ndim': obj.ndim, + 'dtype': obj.dtype.num, + 'data': convert(obj), + 'compress': compressor} + elif isinstance(obj, np.number): + if np.iscomplexobj(obj): + return {'typ': 'np_scalar', + 'sub_typ': 'np_complex', + 'dtype': obj.dtype.name, + 'real': obj.real.__repr__(), + 'imag': obj.imag.__repr__()} + else: + return {'typ': 'np_scalar', + 'dtype': obj.dtype.name, + 'data': obj.__repr__()} + elif isinstance(obj, complex): + return {'typ': 'np_complex', + 'real': obj.real.__repr__(), + 'imag': obj.imag.__repr__()} + + return obj + + +def decode(obj): + """ + Decoder for deserializing numpy data types. + """ + + typ = obj.get('typ') + if typ is None: + return obj + elif typ == 'timestamp': + return Timestamp(obj['value'], tz=obj['tz'], offset=obj['offset']) + elif typ == 'period': + return Period(ordinal=obj['ordinal'], freq=obj['freq']) + elif typ == 'index': + dtype = dtype_for(obj['dtype']) + data = obj['data'] + return globals()[obj['klass']](data, dtype=dtype, name=obj['name']) + elif typ == 'multi_index': + return globals()[obj['klass']].from_tuples(obj['data'], names=obj['names']) + elif typ == 'period_index': + return globals()[obj['klass']](obj['data'], name=obj['name'], freq=obj['freq']) + elif typ == 'datetime_index': + return globals()[obj['klass']](obj['data'], freq=obj['freq'], tz=obj['tz'], name=obj['name']) + elif typ == 'series': + dtype = dtype_for(obj['dtype']) + index = obj['index'] + return globals()[obj['klass']](unconvert(obj['data'], dtype, obj['compress']), index=index, name=obj['name']) + elif typ == 'block_manager': + axes = obj['axes'] + + def create_block(b): + dtype = dtype_for(b['dtype']) + return make_block(unconvert(b['values'], dtype, b['compress']).reshape(b['shape']), b['items'], axes[0], klass=getattr(internals, b['klass'])) + + blocks = [create_block(b) for b in obj['blocks']] + return globals()[obj['klass']](BlockManager(blocks, axes)) + elif typ == 'datetime': + return parse(obj['data']) + elif typ == 'datetime64': + return np.datetime64(parse(obj['data'])) + elif typ == 'date': + return parse(obj['data']).date() + elif typ == 'timedelta': + return timedelta(*obj['data']) + elif typ == 'timedelta64': + return np.timedelta64(int(obj['data'])) + elif typ == 'sparse_series': + dtype = dtype_for(obj['dtype']) + return globals( + )[obj['klass']](unconvert(obj['sp_values'], dtype, obj['compress']), sparse_index=obj['sp_index'], + index=obj['index'], fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) + elif typ == 'sparse_dataframe': + return globals()[obj['klass']](obj['data'], + columns=obj['columns'], default_fill_value=obj['default_fill_value'], default_kind=obj['default_kind']) + elif typ == 'sparse_panel': + return globals()[obj['klass']](obj['data'], + items=obj['items'], default_fill_value=obj['default_fill_value'], default_kind=obj['default_kind']) + elif typ == 'block_index': + return globals()[obj['klass']](obj['length'], obj['blocs'], obj['blengths']) + elif typ == 'int_index': + return globals()[obj['klass']](obj['length'], obj['indices']) + elif typ == 'ndarray': + return unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')).reshape(obj['shape']) + elif typ == 'np_scalar': + if obj.get('sub_typ') == 'np_complex': + return c2f(obj['real'], obj['imag'], obj['dtype']) + else: + dtype = dtype_for(obj['dtype']) + try: + return dtype(obj['data']) + except: + return dtype.type(obj['data']) + elif typ == 'np_complex': + return complex(obj['real'] + '+' + obj['imag'] + 'j') + elif isinstance(obj, (dict, list, set)): + return obj + else: + return obj + + +def pack(o, default=encode, + encoding='utf-8', unicode_errors='strict', use_single_float=False): + """ + Pack an object and return the packed bytes. + """ + + return Packer(default=default, encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float).pack(o) + + +def unpack(packed, object_hook=decode, + list_hook=None, use_list=False, encoding='utf-8', + unicode_errors='strict', object_pairs_hook=None): + """ + Unpack a packed object, return an iterator + Note: packed lists will be returned as tuples + """ + + return Unpacker(packed, object_hook=object_hook, + list_hook=list_hook, + use_list=use_list, encoding=encoding, + unicode_errors=unicode_errors, + object_pairs_hook=object_pairs_hook) + + +class Packer(_Packer): + + def __init__(self, default=encode, + encoding='utf-8', + unicode_errors='strict', + use_single_float=False): + super(Packer, self).__init__(default=default, + encoding=encoding, + unicode_errors=unicode_errors, + use_single_float=use_single_float) + + +class Unpacker(_Unpacker): + + def __init__(self, file_like=None, read_size=0, use_list=False, + object_hook=decode, + object_pairs_hook=None, list_hook=None, encoding='utf-8', + unicode_errors='strict', max_buffer_size=0): + super(Unpacker, self).__init__(file_like=file_like, + read_size=read_size, + use_list=use_list, + object_hook=object_hook, + object_pairs_hook=object_pairs_hook, + list_hook=list_hook, + encoding=encoding, + unicode_errors=unicode_errors, + max_buffer_size=max_buffer_size) + + +class Iterator(object): + + """ manage the unpacking iteration, + close the file on completion """ + + def __init__(self, path, **kwargs): + self.path = path + self.kwargs = kwargs + + def __iter__(self): + + try: + fh = open(self.path, 'rb') + unpacker = unpack(fh) + for o in unpacker: + yield o + finally: + fh.close() diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py new file mode 100644 index 0000000000000..79b421ff7b047 --- /dev/null +++ b/pandas/io/tests/test_packers.py @@ -0,0 +1,387 @@ +import nose +import unittest + +import datetime +import numpy as np + +from pandas import compat +from pandas.compat import u +from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, + date_range, period_range, Index, SparseSeries, SparseDataFrame, + SparsePanel) +import pandas.util.testing as tm +from pandas.util.testing import ensure_clean +from pandas.tests.test_series import assert_series_equal +from pandas.tests.test_frame import assert_frame_equal +from pandas.tests.test_panel import assert_panel_equal + +import pandas +from pandas.sparse.tests.test_sparse import assert_sp_series_equal, assert_sp_frame_equal +from pandas import Timestamp, tslib + +nan = np.nan + +from pandas.io.packers import to_msgpack, read_msgpack + +_multiprocess_can_split_ = False + + +def check_arbitrary(a, b): + + if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): + assert(len(a) == len(b)) + for a_, b_ in zip(a, b): + check_arbitrary(a_, b_) + elif isinstance(a, Panel): + assert_panel_equal(a, b) + elif isinstance(a, DataFrame): + assert_frame_equal(a, b) + elif isinstance(a, Series): + assert_series_equal(a, b) + else: + assert(a == b) + + +class Test(unittest.TestCase): + + def setUp(self): + self.path = '__%s__.msg' % tm.rands(10) + + def tearDown(self): + pass + + def encode_decode(self, x, **kwargs): + with ensure_clean(self.path) as p: + to_msgpack(p, x, **kwargs) + return read_msgpack(p, **kwargs) + + +class TestNumpy(Test): + + def test_numpy_scalar_float(self): + x = np.float32(np.random.rand()) + x_rec = self.encode_decode(x) + self.assert_(np.allclose(x, x_rec) and type(x) == type(x_rec)) + + def test_numpy_scalar_complex(self): + x = np.complex64(np.random.rand() + 1j * np.random.rand()) + x_rec = self.encode_decode(x) + self.assert_(np.allclose(x, x_rec) and type(x) == type(x_rec)) + + def test_scalar_float(self): + x = np.random.rand() + x_rec = self.encode_decode(x) + self.assert_(np.allclose(x, x_rec) and type(x) == type(x_rec)) + + def test_scalar_complex(self): + x = np.random.rand() + 1j * np.random.rand() + x_rec = self.encode_decode(x) + self.assert_(np.allclose(x, x_rec) and type(x) == type(x_rec)) + + def test_list_numpy_float(self): + raise nose.SkipTest('buggy test') + x = [np.float32(np.random.rand()) for i in range(5)] + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: + x == y, x, x_rec)) and + all(map(lambda x, y: type(x) == type(y), x, x_rec))) + + def test_list_numpy_float_complex(self): + if not hasattr(np, 'complex128'): + raise nose.SkipTest('numpy cant handle complex128') + + # buggy test + raise nose.SkipTest('buggy test') + x = [np.float32(np.random.rand()) for i in range(5)] + \ + [np.complex128(np.random.rand() + 1j * np.random.rand()) + for i in range(5)] + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + all(map(lambda x, y: type(x) == type(y), x, x_rec))) + + def test_list_float(self): + x = [np.random.rand() for i in range(5)] + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + all(map(lambda x, y: type(x) == type(y), x, x_rec))) + + def test_list_float_complex(self): + x = [np.random.rand() for i in range(5)] + \ + [(np.random.rand() + 1j * np.random.rand()) for i in range(5)] + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + all(map(lambda x, y: type(x) == type(y), x, x_rec))) + + def test_dict_float(self): + x = {'foo': 1.0, 'bar': 2.0} + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) + + def test_dict_complex(self): + x = {'foo': 1.0 + 1.0j, 'bar': 2.0 + 2.0j} + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) + + def test_dict_numpy_float(self): + x = {'foo': np.float32(1.0), 'bar': np.float32(2.0)} + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) + + def test_dict_numpy_complex(self): + x = {'foo': np.complex128( + 1.0 + 1.0j), 'bar': np.complex128(2.0 + 2.0j)} + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x.values(), x_rec.values())) and + all(map(lambda x, y: type(x) == type(y), x.values(), x_rec.values()))) + + def test_numpy_array_float(self): + x = np.random.rand(5).astype(np.float32) + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + x.dtype == x_rec.dtype) + + def test_numpy_array_complex(self): + x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + x.dtype == x_rec.dtype) + + def test_list_mixed(self): + x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo')] + x_rec = self.encode_decode(x) + self.assert_(all(map(lambda x, y: x == y, x, x_rec)) and + all(map(lambda x, y: type(x) == type(y), x, x_rec))) + + +class TestBasic(Test): + + def test_timestamp(self): + + for i in [Timestamp( + '20130101'), Timestamp('20130101', tz='US/Eastern'), + Timestamp('201301010501')]: + i_rec = self.encode_decode(i) + self.assert_(i == i_rec) + + def test_datetimes(self): + + for i in [datetime.datetime( + 2013, 1, 1), datetime.datetime(2013, 1, 1, 5, 1), + datetime.date(2013, 1, 1), np.datetime64(datetime.datetime(2013, 1, 5, 2, 15))]: + i_rec = self.encode_decode(i) + self.assert_(i == i_rec) + + def test_timedeltas(self): + + for i in [datetime.timedelta(days=1), + datetime.timedelta(days=1, seconds=10), + np.timedelta64(1000000)]: + i_rec = self.encode_decode(i) + self.assert_(i == i_rec) + + +class TestIndex(Test): + + def setUp(self): + super(TestIndex, self).setUp() + + self.d = { + 'string': tm.makeStringIndex(100), + 'date': tm.makeDateIndex(100), + 'int': tm.makeIntIndex(100), + 'float': tm.makeFloatIndex(100), + 'empty': Index([]), + 'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])), + 'period': Index(period_range('2012-1-1', freq='M', periods=3)), + 'date2': Index(date_range('2013-01-1', periods=10)), + 'bdate': Index(bdate_range('2013-01-02', periods=10)), + } + + self.mi = { + 'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'), ('foo', 'two'), + ('qux', 'one'), ('qux', 'two')], names=['first', 'second']), + } + + def test_basic_index(self): + + for s, i in self.d.items(): + i_rec = self.encode_decode(i) + self.assert_(i.equals(i_rec)) + + def test_multi_index(self): + + for s, i in self.mi.items(): + i_rec = self.encode_decode(i) + self.assert_(i.equals(i_rec)) + + def test_unicode(self): + i = tm.makeUnicodeIndex(100) + i_rec = self.encode_decode(i) + self.assert_(i.equals(i_rec)) + + +class TestSeries(Test): + + def setUp(self): + super(TestSeries, self).setUp() + + self.d = {} + + s = tm.makeStringSeries() + s.name = 'string' + self.d['string'] = s + + s = tm.makeObjectSeries() + s.name = 'object' + self.d['object'] = s + + s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + self.d['date'] = s + + data = { + 'A': [0., 1., 2., 3., np.nan], + 'B': [0, 1, 0, 1, 0], + 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], + 'D': date_range('1/1/2009', periods=5), + 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], + } + + self.d['float'] = Series(data['A']) + self.d['int'] = Series(data['B']) + self.d['mixed'] = Series(data['E']) + + def test_basic(self): + + for s, i in self.d.items(): + i_rec = self.encode_decode(i) + assert_series_equal(i, i_rec) + + +class TestNDFrame(Test): + + def setUp(self): + super(TestNDFrame, self).setUp() + + data = { + 'A': [0., 1., 2., 3., np.nan], + 'B': [0, 1, 0, 1, 0], + 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], + 'D': date_range('1/1/2009', periods=5), + 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], + } + + self.frame = { + 'float': DataFrame(dict(A=data['A'], B=Series(data['A']) + 1)), + 'int': DataFrame(dict(A=data['B'], B=Series(data['B']) + 1)), + 'mixed': DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']]))} + + self.panel = { + 'float': Panel(dict(ItemA=self.frame['float'], ItemB=self.frame['float'] + 1))} + + def test_basic_frame(self): + + for s, i in self.frame.items(): + i_rec = self.encode_decode(i) + assert_frame_equal(i, i_rec) + + def test_basic_panel(self): + + for s, i in self.panel.items(): + i_rec = self.encode_decode(i) + assert_panel_equal(i, i_rec) + + def test_multi(self): + + i_rec = self.encode_decode(self.frame) + for k in self.frame.keys(): + assert_frame_equal(self.frame[k], i_rec[k]) + + l = tuple( + [self.frame['float'], self.frame['float'].A, self.frame['float'].B, None]) + l_rec = self.encode_decode(l) + check_arbitrary(l, l_rec) + + # this is an oddity in that packed lists will be returned as tuples + l = [self.frame['float'], self.frame['float'] + .A, self.frame['float'].B, None] + l_rec = self.encode_decode(l) + self.assert_(isinstance(l_rec, tuple)) + check_arbitrary(l, l_rec) + + def test_iterator(self): + + l = [self.frame['float'], self.frame['float'] + .A, self.frame['float'].B, None] + + with ensure_clean(self.path) as path: + to_msgpack(path, *l) + for i, packed in enumerate(read_msgpack(path, iterator=True)): + check_arbitrary(packed, l[i]) + + +class TestSparse(Test): + + def _check_roundtrip(self, obj, comparator, **kwargs): + + i_rec = self.encode_decode(obj) + comparator(obj, i_rec, **kwargs) + + def test_sparse_series(self): + + s = tm.makeStringSeries() + s[3:5] = np.nan + ss = s.to_sparse() + self._check_roundtrip(ss, tm.assert_series_equal, + check_series_type=True) + + ss2 = s.to_sparse(kind='integer') + self._check_roundtrip(ss2, tm.assert_series_equal, + check_series_type=True) + + ss3 = s.to_sparse(fill_value=0) + self._check_roundtrip(ss3, tm.assert_series_equal, + check_series_type=True) + + def test_sparse_frame(self): + + s = tm.makeDataFrame() + s.ix[3:5, 1:3] = np.nan + s.ix[8:10, -2] = np.nan + ss = s.to_sparse() + + self._check_roundtrip(ss, tm.assert_frame_equal, + check_frame_type=True) + + ss2 = s.to_sparse(kind='integer') + self._check_roundtrip(ss2, tm.assert_frame_equal, + check_frame_type=True) + + ss3 = s.to_sparse(fill_value=0) + self._check_roundtrip(ss3, tm.assert_frame_equal, + check_frame_type=True) + + def test_sparse_panel(self): + + items = ['x', 'y', 'z'] + p = Panel(dict((i, tm.makeDataFrame().ix[:2, :2]) for i in items)) + sp = p.to_sparse() + + self._check_roundtrip(sp, tm.assert_panel_equal, + check_panel_type=True) + + sp2 = p.to_sparse(kind='integer') + self._check_roundtrip(sp2, tm.assert_panel_equal, + check_panel_type=True) + + sp3 = p.to_sparse(fill_value=0) + self._check_roundtrip(sp3, tm.assert_panel_equal, + check_panel_type=True) + + +if __name__ == '__main__': + import nose + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/msgpack.pyx b/pandas/msgpack.pyx new file mode 100644 index 0000000000000..2c8d7fd014b94 --- /dev/null +++ b/pandas/msgpack.pyx @@ -0,0 +1,711 @@ +# coding: utf-8 +#cython: embedsignature=True +#cython: profile=False + +from cpython cimport * +cdef extern from "Python.h": + ctypedef char* const_char_ptr "const char*" + ctypedef char* const_void_ptr "const void*" + ctypedef struct PyObject + cdef int PyObject_AsReadBuffer(object o, const_void_ptr* buff, Py_ssize_t* buf_len) except -1 + +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * + +import cython +import numpy as np +from numpy cimport * + +class UnpackException(IOError): + pass + + +class BufferFull(UnpackException): + pass + + +class OutOfData(UnpackException): + pass + + +class UnpackValueError(UnpackException, ValueError): + pass + + +class ExtraData(ValueError): + def __init__(self, unpacked, extra): + self.unpacked = unpacked + self.extra = extra + + def __str__(self): + return "unpack(b) recieved extra data." + +class PackException(IOError): + pass + +class PackValueError(PackException, ValueError): + pass + +cdef extern from "msgpack/unpack.h": + ctypedef struct msgpack_user: + bint use_list + PyObject* object_hook + bint has_pairs_hook # call object_hook with k-v pairs + PyObject* list_hook + char *encoding + char *unicode_errors + + ctypedef struct template_context: + msgpack_user user + PyObject* obj + size_t count + unsigned int ct + PyObject* key + + ctypedef int (*execute_fn)(template_context* ctx, const_char_ptr data, + size_t len, size_t* off) except? -1 + execute_fn template_construct + execute_fn template_skip + execute_fn read_array_header + execute_fn read_map_header + void template_init(template_context* ctx) + object template_data(template_context* ctx) + +cdef extern from "msgpack/pack.h": + struct msgpack_packer: + char* buf + size_t length + size_t buf_size + + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + +cdef int DEFAULT_RECURSE_LIMIT=511 + + + +cdef class Packer(object): + """MessagePack Packer + + usage: + + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) + + Packer's constructor has some keyword arguments: + + * *defaut* - Convert user type to builtin type that Packer supports. + See also simplejson's document. + * *encoding* - Convert unicode to bytes with this encoding. (default: 'utf-8') + * *unicode_errors* - Error handler for encoding unicode. (default: 'strict') + * *use_single_float* - Use single precision float type for float. (default: False) + * *autoreset* - Reset buffer after each pack and return it's content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. + """ + cdef msgpack_packer pk + cdef object _default + cdef object _bencoding + cdef object _berrors + cdef char *encoding + cdef char *unicode_errors + cdef bool use_float + cdef bint autoreset + + def __cinit__(self): + cdef int buf_size = 1024*1024 + self.pk.buf = malloc(buf_size); + if self.pk.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.pk.buf_size = buf_size + self.pk.length = 0 + + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + use_single_float=False, bint autoreset=1): + self.use_float = use_single_float + self.autoreset = autoreset + if default is not None: + if not PyCallable_Check(default): + raise TypeError("default must be a callable.") + self._default = default + if encoding is None: + self.encoding = NULL + self.unicode_errors = NULL + else: + if isinstance(encoding, unicode): + self._bencoding = encoding.encode('ascii') + else: + self._bencoding = encoding + self.encoding = PyBytes_AsString(self._bencoding) + if isinstance(unicode_errors, unicode): + self._berrors = unicode_errors.encode('ascii') + else: + self._berrors = unicode_errors + self.unicode_errors = PyBytes_AsString(self._berrors) + + def __dealloc__(self): + free(self.pk.buf); + + @cython.boundscheck(False) + @cython.wraparound(False) + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long long llval + cdef unsigned long long ullval + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval + cdef int ret + cdef dict d + cdef object dtype + + cdef int n,i + cdef double f8val + cdef int64_t i8val + cdef ndarray[float64_t,ndim=1] array_double + cdef ndarray[int64_t,ndim=1] array_int + + if nest_limit < 0: + raise PackValueError("recursion limit exceeded.") + + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = o + ret = msgpack_pack_map(&self.pk, len(d)) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + ret = msgpack_pack_map(&self.pk, len(o)) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyTuple_Check(o) or PyList_Check(o): + ret = msgpack_pack_array(&self.pk, len(o)) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + + # ndarray support ONLY (and float64/int64) for now + elif isinstance(o, np.ndarray) and not hasattr(o,'values') and (o.dtype == 'float64' or o.dtype == 'int64'): + + ret = msgpack_pack_map(&self.pk, 5) + if ret != 0: return -1 + + dtype = o.dtype + self.pack_pair('typ', 'ndarray', nest_limit) + self.pack_pair('shape', o.shape, nest_limit) + self.pack_pair('ndim', o.ndim, nest_limit) + self.pack_pair('dtype', dtype.num, nest_limit) + + ret = self._pack('data', nest_limit-1) + if ret != 0: return ret + + if dtype == 'float64': + array_double = o.ravel() + n = len(array_double) + ret = msgpack_pack_array(&self.pk, n) + if ret != 0: return ret + + for i in range(n): + + f8val = array_double[i] + ret = msgpack_pack_double(&self.pk, f8val) + if ret != 0: break + elif dtype == 'int64': + array_int = o.ravel() + n = len(array_int) + ret = msgpack_pack_array(&self.pk, n) + if ret != 0: return ret + + for i in range(n): + + i8val = array_int[i] + ret = msgpack_pack_long_long(&self.pk, i8val) + if ret != 0: break + + elif self._default: + o = self._default(o) + ret = self._pack(o, nest_limit-1) + else: + raise TypeError("can't serialize %r" % (o,)) + return ret + + cpdef pack(self, object obj): + cdef int ret + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + if ret == -1: + raise MemoryError + elif ret: # should not happen. + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_array_header(self, size_t size): + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_header(self, size_t size): + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def pack_map_pairs(self, object pairs): + """ + Pack *pairs* as msgpack map type. + + *pairs* should sequence of pair. + (`len(pairs)` and `for k, v in *pairs*:` should be supported.) + """ + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError + if self.autoreset: + buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + self.pk.length = 0 + return buf + + def reset(self): + """Clear internal buffer.""" + self.pk.length = 0 + + def bytes(self): + """Return buffer content.""" + return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) + + + cdef inline pack_pair(self, object k, object v, int nest_limit): + ret = self._pack(k, nest_limit-1) + if ret != 0: raise PackException("cannot pack : %s" % k) + ret = self._pack(v, nest_limit-1) + if ret != 0: raise PackException("cannot pack : %s" % v) + return ret + +def pack(object o, object stream, default=None, encoding='utf-8', unicode_errors='strict'): + """ + pack an object `o` and write it to stream).""" + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors) + stream.write(packer.pack(o)) + +def packb(object o, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False): + """ + pack o and return packed bytes.""" + packer = Packer(default=default, encoding=encoding, unicode_errors=unicode_errors, + use_single_float=use_single_float) + return packer.pack(o) + + +cdef inline init_ctx(template_context *ctx, + object object_hook, object object_pairs_hook, object list_hook, + bint use_list, char* encoding, char* unicode_errors): + template_init(ctx) + ctx.user.use_list = use_list + ctx.user.object_hook = ctx.user.list_hook = NULL + + if object_hook is not None and object_pairs_hook is not None: + raise ValueError("object_pairs_hook and object_hook are mutually exclusive.") + + if object_hook is not None: + if not PyCallable_Check(object_hook): + raise TypeError("object_hook must be a callable.") + ctx.user.object_hook = object_hook + + if object_pairs_hook is None: + ctx.user.has_pairs_hook = False + else: + if not PyCallable_Check(object_pairs_hook): + raise TypeError("object_pairs_hook must be a callable.") + ctx.user.object_hook = object_pairs_hook + ctx.user.has_pairs_hook = True + + if list_hook is not None: + if not PyCallable_Check(list_hook): + raise TypeError("list_hook must be a callable.") + ctx.user.list_hook = list_hook + + ctx.user.encoding = encoding + ctx.user.unicode_errors = unicode_errors + +def unpackb(object packed, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", + object_pairs_hook=None, + ): + """Unpack packed_bytes to object. Returns an unpacked object. + + Raises `ValueError` when `packed` contains extra bytes. + """ + cdef template_context ctx + cdef size_t off = 0 + cdef int ret + + cdef char* buf + cdef Py_ssize_t buf_len + cdef char* cenc = NULL + cdef char* cerr = NULL + + PyObject_AsReadBuffer(packed, &buf, &buf_len) + + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) + + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + ret = template_construct(&ctx, buf, buf_len, &off) + if ret == 1: + obj = template_data(&ctx) + if off < buf_len: + raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) + return obj + elif ret < 0: + raise ValueError("Unpack failed: error = %d" % (ret,)) + else: + raise UnpackValueError + + +def unpack(object stream, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", + object_pairs_hook=None, + ): + """Unpack an object from `stream`. + + Raises `ValueError` when `stream` has extra bytes. + """ + return unpackb(stream.read(), use_list=use_list, + object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, + encoding=encoding, unicode_errors=unicode_errors, + ) + + +cdef class Unpacker(object): + """ + Streaming unpacker. + + `file_like` is a file-like object having `.read(n)` method. + When `Unpacker` initialized with `file_like`, unpacker reads serialized data + from it and `.feed()` method is not usable. + + `read_size` is used as `file_like.read(read_size)`. + (default: min(1024**2, max_buffer_size)) + + If `use_list` is true (default), msgpack list is deserialized to Python list. + Otherwise, it is deserialized to Python tuple. + + `object_hook` is same to simplejson. If it is not None, it should be callable + and Unpacker calls it with a dict argument after deserializing a map. + + `object_pairs_hook` is same to simplejson. If it is not None, it should be callable + and Unpacker calls it with a list of key-value pairs after deserializing a map. + + `encoding` is encoding used for decoding msgpack bytes. If it is None (default), + msgpack bytes is deserialized to Python bytes. + + `unicode_errors` is used for decoding bytes. + + `max_buffer_size` limits size of data waiting unpacked. + 0 means system's INT_MAX (default). + Raises `BufferFull` exception when it is insufficient. + You shoud set this parameter when unpacking data from untrasted source. + + example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + do_something(o) + + example of streaming deserialize from socket:: + + unpacker = Unpacker() + while 1: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + do_something(o) + """ + cdef template_context ctx + cdef char* buf + cdef size_t buf_size, buf_head, buf_tail + cdef object file_like + cdef object file_like_read + cdef Py_ssize_t read_size + cdef object object_hook + cdef object encoding, unicode_errors + cdef size_t max_buffer_size + + def __cinit__(self): + self.buf = NULL + + def __dealloc__(self): + free(self.buf) + self.buf = NULL + + def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, + object object_hook=None, object object_pairs_hook=None, object list_hook=None, + encoding=None, unicode_errors='strict', int max_buffer_size=0, + ): + cdef char *cenc=NULL, *cerr=NULL + + self.file_like = file_like + if file_like: + self.file_like_read = file_like.read + if not PyCallable_Check(self.file_like_read): + raise ValueError("`file_like.read` must be a callable.") + if not max_buffer_size: + max_buffer_size = INT_MAX + if read_size > max_buffer_size: + raise ValueError("read_size should be less or equal to max_buffer_size") + if not read_size: + read_size = min(max_buffer_size, 1024**2) + self.max_buffer_size = max_buffer_size + self.read_size = read_size + self.buf = malloc(read_size) + if self.buf == NULL: + raise MemoryError("Unable to allocate internal buffer.") + self.buf_size = read_size + self.buf_head = 0 + self.buf_tail = 0 + + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + self.encoding = encoding + cenc = PyBytes_AsString(encoding) + + if unicode_errors is not None: + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + self.unicode_errors = unicode_errors + cerr = PyBytes_AsString(unicode_errors) + + init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) + + def feed(self, object next_bytes): + """Append `next_bytes` to internal buffer.""" + cdef char* buf + cdef Py_ssize_t buf_len + if self.file_like is not None: + raise TypeError( + "unpacker.feed() is not be able to use with `file_like`.") + PyObject_AsReadBuffer(next_bytes, &buf, &buf_len) + self.append_buffer(buf, buf_len) + + cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): + cdef: + char* buf = self.buf + char* new_buf + size_t head = self.buf_head + size_t tail = self.buf_tail + size_t buf_size = self.buf_size + size_t new_size + + if tail + _buf_len > buf_size: + if ((tail - head) + _buf_len) <= buf_size: + # move to front. + memmove(buf, buf + head, tail - head) + tail -= head + head = 0 + else: + # expand buffer. + new_size = (tail-head) + _buf_len + if new_size > self.max_buffer_size: + raise BufferFull + new_size = min(new_size*2, self.max_buffer_size) + new_buf = malloc(new_size) + if new_buf == NULL: + # self.buf still holds old buffer and will be freed during + # obj destruction + raise MemoryError("Unable to enlarge internal buffer.") + memcpy(new_buf, buf + head, tail - head) + free(buf) + + buf = new_buf + buf_size = new_size + tail -= head + head = 0 + + memcpy(buf + tail, (_buf), _buf_len) + self.buf = buf + self.buf_head = head + self.buf_size = buf_size + self.buf_tail = tail + _buf_len + + cdef read_from_file(self): + next_bytes = self.file_like_read( + min(self.read_size, + self.max_buffer_size - (self.buf_tail - self.buf_head) + )) + if next_bytes: + self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) + else: + self.file_like = None + + cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): + cdef int ret + cdef object obj + cdef size_t prev_head + while 1: + prev_head = self.buf_head + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) + + if ret == 1: + obj = template_data(&self.ctx) + template_init(&self.ctx) + return obj + elif ret == 0: + if self.file_like is not None: + self.read_from_file() + continue + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + else: + raise ValueError("Unpack failed: error = %d" % (ret,)) + + def read_bytes(self, Py_ssize_t nbytes): + """read a specified number of raw bytes from the stream""" + cdef size_t nread + nread = min(self.buf_tail - self.buf_head, nbytes) + ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) + self.buf_head += nread + if len(ret) < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - len(ret)) + return ret + + def unpack(self, object write_bytes=None): + """ + unpack one object + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(template_construct, write_bytes) + + def skip(self, object write_bytes=None): + """ + read and ignore one object, returning None + + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(template_skip, write_bytes) + + def read_array_header(self, object write_bytes=None): + """assuming the next object is an array, return its size n, such that + the next n unpack() calls will iterate over its contents. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_array_header, write_bytes) + + def read_map_header(self, object write_bytes=None): + """assuming the next object is a map, return its size n, such that the + next n * 2 unpack() calls will iterate over its key-value pairs. + + Raises `OutOfData` when there are no more bytes to unpack. + """ + return self._unpack(read_map_header, write_bytes) + + def __iter__(self): + return self + + def __next__(self): + return self._unpack(template_construct, None, 1) + + # for debug. + #def _buf(self): + # return PyString_FromStringAndSize(self.buf, self.buf_tail) + + #def _off(self): + # return self.buf_head diff --git a/pandas/src/msgpack/pack.h b/pandas/src/msgpack/pack.h new file mode 100644 index 0000000000000..bb939d93ebeca --- /dev/null +++ b/pandas/src/msgpack/pack.h @@ -0,0 +1,108 @@ +/* + * MessagePack for Python packing routine + * + * Copyright (C) 2009 Naoki INADA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "sysdep.h" +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +#define inline __inline +#endif + +typedef struct msgpack_packer { + char *buf; + size_t length; + size_t buf_size; +} msgpack_packer; + +typedef struct Packer Packer; + +static inline int msgpack_pack_short(msgpack_packer* pk, short d); +static inline int msgpack_pack_int(msgpack_packer* pk, int d); +static inline int msgpack_pack_long(msgpack_packer* pk, long d); +static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); +static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); +static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); +static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); +static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); + +static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); +static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); +static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); +static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); +static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); +static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); +static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); +static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); + +static inline int msgpack_pack_float(msgpack_packer* pk, float d); +static inline int msgpack_pack_double(msgpack_packer* pk, double d); + +static inline int msgpack_pack_nil(msgpack_packer* pk); +static inline int msgpack_pack_true(msgpack_packer* pk); +static inline int msgpack_pack_false(msgpack_packer* pk); + +static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); + +static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); + +static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); +static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); + +static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) +{ + char* buf = pk->buf; + size_t bs = pk->buf_size; + size_t len = pk->length; + + if (len + l > bs) { + bs = (len + l) * 2; + buf = (char*)realloc(buf, bs); + if (!buf) return -1; + } + memcpy(buf + len, data, l); + len += l; + + pk->buf = buf; + pk->buf_size = bs; + pk->length = len; + return 0; +} + +#define msgpack_pack_inline_func(name) \ + static inline int msgpack_pack ## name + +#define msgpack_pack_inline_func_cint(name) \ + static inline int msgpack_pack ## name + +#define msgpack_pack_user msgpack_packer* + +#define msgpack_pack_append_buffer(user, buf, len) \ + return msgpack_pack_write(user, (const char*)buf, len) + +#include "pack_template.h" + +#ifdef __cplusplus +} +#endif diff --git a/pandas/src/msgpack/pack_template.h b/pandas/src/msgpack/pack_template.h new file mode 100644 index 0000000000000..65c959dd8ce63 --- /dev/null +++ b/pandas/src/msgpack/pack_template.h @@ -0,0 +1,771 @@ +/* + * MessagePack packing routine template + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#if defined(__LITTLE_ENDIAN__) +#define TAKE8_8(d) ((uint8_t*)&d)[0] +#define TAKE8_16(d) ((uint8_t*)&d)[0] +#define TAKE8_32(d) ((uint8_t*)&d)[0] +#define TAKE8_64(d) ((uint8_t*)&d)[0] +#elif defined(__BIG_ENDIAN__) +#define TAKE8_8(d) ((uint8_t*)&d)[0] +#define TAKE8_16(d) ((uint8_t*)&d)[1] +#define TAKE8_32(d) ((uint8_t*)&d)[3] +#define TAKE8_64(d) ((uint8_t*)&d)[7] +#endif + +#ifndef msgpack_pack_inline_func +#error msgpack_pack_inline_func template is not defined +#endif + +#ifndef msgpack_pack_user +#error msgpack_pack_user type is not defined +#endif + +#ifndef msgpack_pack_append_buffer +#error msgpack_pack_append_buffer callback is not defined +#endif + + +/* + * Integer + */ + +#define msgpack_pack_real_uint8(x, d) \ +do { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ +} while(0) + +#define msgpack_pack_real_uint16(x, d) \ +do { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ +} while(0) + +#define msgpack_pack_real_uint32(x, d) \ +do { \ + if(d < (1<<8)) { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ +} while(0) + +#define msgpack_pack_real_uint64(x, d) \ +do { \ + if(d < (1ULL<<8)) { \ + if(d < (1ULL<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else { \ + if(d < (1ULL<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else if(d < (1ULL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ +} while(0) + +#define msgpack_pack_real_int8(x, d) \ +do { \ + if(d < -(1<<5)) { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } \ +} while(0) + +#define msgpack_pack_real_int16(x, d) \ +do { \ + if(d < -(1<<5)) { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } \ +} while(0) + +#define msgpack_pack_real_int32(x, d) \ +do { \ + if(d < -(1<<5)) { \ + if(d < -(1<<15)) { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ + } else { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else if(d < (1<<16)) { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } \ +} while(0) + +#define msgpack_pack_real_int64(x, d) \ +do { \ + if(d < -(1LL<<5)) { \ + if(d < -(1LL<<15)) { \ + if(d < -(1LL<<31)) { \ + /* signed 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } else { \ + /* signed 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } \ + } else { \ + if(d < -(1<<7)) { \ + /* signed 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } else { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ + } \ + } else if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ + } else { \ + if(d < (1LL<<16)) { \ + if(d < (1<<8)) { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* unsigned 16 */ \ + unsigned char buf[3]; \ + buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ + msgpack_pack_append_buffer(x, buf, 3); \ + } \ + } else { \ + if(d < (1LL<<32)) { \ + /* unsigned 32 */ \ + unsigned char buf[5]; \ + buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ + msgpack_pack_append_buffer(x, buf, 5); \ + } else { \ + /* unsigned 64 */ \ + unsigned char buf[9]; \ + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ + msgpack_pack_append_buffer(x, buf, 9); \ + } \ + } \ + } \ +} while(0) + + +#ifdef msgpack_pack_inline_func_fixint + +msgpack_pack_inline_func_fixint(_uint8)(msgpack_pack_user x, uint8_t d) +{ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; + msgpack_pack_append_buffer(x, buf, 2); +} + +msgpack_pack_inline_func_fixint(_uint16)(msgpack_pack_user x, uint16_t d) +{ + unsigned char buf[3]; + buf[0] = 0xcd; _msgpack_store16(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 3); +} + +msgpack_pack_inline_func_fixint(_uint32)(msgpack_pack_user x, uint32_t d) +{ + unsigned char buf[5]; + buf[0] = 0xce; _msgpack_store32(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 5); +} + +msgpack_pack_inline_func_fixint(_uint64)(msgpack_pack_user x, uint64_t d) +{ + unsigned char buf[9]; + buf[0] = 0xcf; _msgpack_store64(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 9); +} + +msgpack_pack_inline_func_fixint(_int8)(msgpack_pack_user x, int8_t d) +{ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; + msgpack_pack_append_buffer(x, buf, 2); +} + +msgpack_pack_inline_func_fixint(_int16)(msgpack_pack_user x, int16_t d) +{ + unsigned char buf[3]; + buf[0] = 0xd1; _msgpack_store16(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 3); +} + +msgpack_pack_inline_func_fixint(_int32)(msgpack_pack_user x, int32_t d) +{ + unsigned char buf[5]; + buf[0] = 0xd2; _msgpack_store32(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 5); +} + +msgpack_pack_inline_func_fixint(_int64)(msgpack_pack_user x, int64_t d) +{ + unsigned char buf[9]; + buf[0] = 0xd3; _msgpack_store64(&buf[1], d); + msgpack_pack_append_buffer(x, buf, 9); +} + +#undef msgpack_pack_inline_func_fixint +#endif + + +msgpack_pack_inline_func(_uint8)(msgpack_pack_user x, uint8_t d) +{ + msgpack_pack_real_uint8(x, d); +} + +msgpack_pack_inline_func(_uint16)(msgpack_pack_user x, uint16_t d) +{ + msgpack_pack_real_uint16(x, d); +} + +msgpack_pack_inline_func(_uint32)(msgpack_pack_user x, uint32_t d) +{ + msgpack_pack_real_uint32(x, d); +} + +msgpack_pack_inline_func(_uint64)(msgpack_pack_user x, uint64_t d) +{ + msgpack_pack_real_uint64(x, d); +} + +msgpack_pack_inline_func(_int8)(msgpack_pack_user x, int8_t d) +{ + msgpack_pack_real_int8(x, d); +} + +msgpack_pack_inline_func(_int16)(msgpack_pack_user x, int16_t d) +{ + msgpack_pack_real_int16(x, d); +} + +msgpack_pack_inline_func(_int32)(msgpack_pack_user x, int32_t d) +{ + msgpack_pack_real_int32(x, d); +} + +msgpack_pack_inline_func(_int64)(msgpack_pack_user x, int64_t d) +{ + msgpack_pack_real_int64(x, d); +} + + +#ifdef msgpack_pack_inline_func_cint + +msgpack_pack_inline_func_cint(_short)(msgpack_pack_user x, short d) +{ +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#elif defined(SHRT_MAX) +#if SHRT_MAX == 0x7fff + msgpack_pack_real_int16(x, d); +#elif SHRT_MAX == 0x7fffffff + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(short) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(short) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_int)(msgpack_pack_user x, int d) +{ +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#elif defined(INT_MAX) +#if INT_MAX == 0x7fff + msgpack_pack_real_int16(x, d); +#elif INT_MAX == 0x7fffffff + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(int) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(int) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_long)(msgpack_pack_user x, long d) +{ +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG == 4 + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#elif defined(LONG_MAX) +#if LONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_long_long)(msgpack_pack_user x, long long d) +{ +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#elif defined(LLONG_MAX) +#if LLONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LLONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(long long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_unsigned_short)(msgpack_pack_user x, unsigned short d) +{ +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(USHRT_MAX) +#if USHRT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif USHRT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned short) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned short) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_unsigned_int)(msgpack_pack_user x, unsigned int d) +{ +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(UINT_MAX) +#if UINT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif UINT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned int) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned int) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_unsigned_long)(msgpack_pack_user x, unsigned long d) +{ +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULONG_MAX) +#if ULONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +msgpack_pack_inline_func_cint(_unsigned_long_long)(msgpack_pack_user x, unsigned long long d) +{ +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULLONG_MAX) +#if ULLONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULLONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +#undef msgpack_pack_inline_func_cint +#endif + + + +/* + * Float + */ + +msgpack_pack_inline_func(_float)(msgpack_pack_user x, float d) +{ + union { float f; uint32_t i; } mem; + mem.f = d; + unsigned char buf[5]; + buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); + msgpack_pack_append_buffer(x, buf, 5); +} + +msgpack_pack_inline_func(_double)(msgpack_pack_user x, double d) +{ + union { double f; uint64_t i; } mem; + mem.f = d; + unsigned char buf[9]; + buf[0] = 0xcb; +#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi + // https://github.com/msgpack/msgpack-perl/pull/1 + mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); +#endif + _msgpack_store64(&buf[1], mem.i); + msgpack_pack_append_buffer(x, buf, 9); +} + + +/* + * Nil + */ + +msgpack_pack_inline_func(_nil)(msgpack_pack_user x) +{ + static const unsigned char d = 0xc0; + msgpack_pack_append_buffer(x, &d, 1); +} + + +/* + * Boolean + */ + +msgpack_pack_inline_func(_true)(msgpack_pack_user x) +{ + static const unsigned char d = 0xc3; + msgpack_pack_append_buffer(x, &d, 1); +} + +msgpack_pack_inline_func(_false)(msgpack_pack_user x) +{ + static const unsigned char d = 0xc2; + msgpack_pack_append_buffer(x, &d, 1); +} + + +/* + * Array + */ + +msgpack_pack_inline_func(_array)(msgpack_pack_user x, unsigned int n) +{ + if(n < 16) { + unsigned char d = 0x90 | n; + msgpack_pack_append_buffer(x, &d, 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } +} + + +/* + * Map + */ + +msgpack_pack_inline_func(_map)(msgpack_pack_user x, unsigned int n) +{ + if(n < 16) { + unsigned char d = 0x80 | n; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if(n < 65536) { + unsigned char buf[3]; + buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); + msgpack_pack_append_buffer(x, buf, 5); + } +} + + +/* + * Raw + */ + +msgpack_pack_inline_func(_raw)(msgpack_pack_user x, size_t l) +{ + if(l < 32) { + unsigned char d = 0xa0 | (uint8_t)l; + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); + } else if(l < 65536) { + unsigned char buf[3]; + buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); + msgpack_pack_append_buffer(x, buf, 3); + } else { + unsigned char buf[5]; + buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); + msgpack_pack_append_buffer(x, buf, 5); + } +} + +msgpack_pack_inline_func(_raw_body)(msgpack_pack_user x, const void* b, size_t l) +{ + msgpack_pack_append_buffer(x, (const unsigned char*)b, l); +} + +#undef msgpack_pack_inline_func +#undef msgpack_pack_user +#undef msgpack_pack_append_buffer + +#undef TAKE8_8 +#undef TAKE8_16 +#undef TAKE8_32 +#undef TAKE8_64 + +#undef msgpack_pack_real_uint8 +#undef msgpack_pack_real_uint16 +#undef msgpack_pack_real_uint32 +#undef msgpack_pack_real_uint64 +#undef msgpack_pack_real_int8 +#undef msgpack_pack_real_int16 +#undef msgpack_pack_real_int32 +#undef msgpack_pack_real_int64 + diff --git a/pandas/src/msgpack/sysdep.h b/pandas/src/msgpack/sysdep.h new file mode 100644 index 0000000000000..4fedbd8ba472f --- /dev/null +++ b/pandas/src/msgpack/sysdep.h @@ -0,0 +1,195 @@ +/* + * MessagePack system dependencies + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MSGPACK_SYSDEP_H__ +#define MSGPACK_SYSDEP_H__ + +#include +#include +#if defined(_MSC_VER) && _MSC_VER < 1600 +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +#elif defined(_MSC_VER) // && _MSC_VER >= 1600 +#include +#else +#include +#include +#endif + +#ifdef _WIN32 +#define _msgpack_atomic_counter_header +typedef long _msgpack_atomic_counter_t; +#define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr) +#define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr) +#elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41) +#define _msgpack_atomic_counter_header "gcc_atomic.h" +#else +typedef unsigned int _msgpack_atomic_counter_t; +#define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1) +#define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1) +#endif + +#ifdef _WIN32 + +#ifdef __cplusplus +/* numeric_limits::min,max */ +#ifdef max +#undef max +#endif +#ifdef min +#undef min +#endif +#endif + +#else +#include /* __BYTE_ORDER */ +#endif + +#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __LITTLE_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN +#define __BIG_ENDIAN__ +#elif _WIN32 +#define __LITTLE_ENDIAN__ +#endif +#endif + + +#ifdef __LITTLE_ENDIAN__ + +#ifdef _WIN32 +# if defined(ntohs) +# define _msgpack_be16(x) ntohs(x) +# elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x)) +# else +# define _msgpack_be16(x) ( \ + ((((uint16_t)x) << 8) ) | \ + ((((uint16_t)x) >> 8) ) ) +# endif +#else +# define _msgpack_be16(x) ntohs(x) +#endif + +#ifdef _WIN32 +# if defined(ntohl) +# define _msgpack_be32(x) ntohl(x) +# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) +# else +# define _msgpack_be32(x) \ + ( ((((uint32_t)x) << 24) ) | \ + ((((uint32_t)x) << 8) & 0x00ff0000U ) | \ + ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \ + ((((uint32_t)x) >> 24) ) ) +# endif +#else +# define _msgpack_be32(x) ntohl(x) +#endif + +#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) +# define _msgpack_be64(x) (_byteswap_uint64(x)) +#elif defined(bswap_64) +# define _msgpack_be64(x) bswap_64(x) +#elif defined(__DARWIN_OSSwapInt64) +# define _msgpack_be64(x) __DARWIN_OSSwapInt64(x) +#else +#define _msgpack_be64(x) \ + ( ((((uint64_t)x) << 56) ) | \ + ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ + ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ + ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ + ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ + ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ + ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ + ((((uint64_t)x) >> 56) ) ) +#endif + +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)(from))[0]) << 8) | \ + (((uint16_t)((uint8_t*)(from))[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)(from))[0]) << 24) | \ + (((uint32_t)((uint8_t*)(from))[1]) << 16) | \ + (((uint32_t)((uint8_t*)(from))[2]) << 8) | \ + (((uint32_t)((uint8_t*)(from))[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)(from))[0]) << 56) | \ + (((uint64_t)((uint8_t*)(from))[1]) << 48) | \ + (((uint64_t)((uint8_t*)(from))[2]) << 40) | \ + (((uint64_t)((uint8_t*)(from))[3]) << 32) | \ + (((uint64_t)((uint8_t*)(from))[4]) << 24) | \ + (((uint64_t)((uint8_t*)(from))[5]) << 16) | \ + (((uint64_t)((uint8_t*)(from))[6]) << 8) | \ + (((uint64_t)((uint8_t*)(from))[7]) ) )) + +#else + +#define _msgpack_be16(x) (x) +#define _msgpack_be32(x) (x) +#define _msgpack_be64(x) (x) + +#define _msgpack_load16(cast, from) ((cast)( \ + (((uint16_t)((uint8_t*)from)[0]) << 8) | \ + (((uint16_t)((uint8_t*)from)[1]) ) )) + +#define _msgpack_load32(cast, from) ((cast)( \ + (((uint32_t)((uint8_t*)from)[0]) << 24) | \ + (((uint32_t)((uint8_t*)from)[1]) << 16) | \ + (((uint32_t)((uint8_t*)from)[2]) << 8) | \ + (((uint32_t)((uint8_t*)from)[3]) ) )) + +#define _msgpack_load64(cast, from) ((cast)( \ + (((uint64_t)((uint8_t*)from)[0]) << 56) | \ + (((uint64_t)((uint8_t*)from)[1]) << 48) | \ + (((uint64_t)((uint8_t*)from)[2]) << 40) | \ + (((uint64_t)((uint8_t*)from)[3]) << 32) | \ + (((uint64_t)((uint8_t*)from)[4]) << 24) | \ + (((uint64_t)((uint8_t*)from)[5]) << 16) | \ + (((uint64_t)((uint8_t*)from)[6]) << 8) | \ + (((uint64_t)((uint8_t*)from)[7]) ) )) +#endif + + +#define _msgpack_store16(to, num) \ + do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0) +#define _msgpack_store32(to, num) \ + do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0) +#define _msgpack_store64(to, num) \ + do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0) + +/* +#define _msgpack_load16(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); }) +#define _msgpack_load32(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); }) +#define _msgpack_load64(cast, from) \ + ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); }) +*/ + + +#endif /* msgpack/sysdep.h */ + diff --git a/pandas/src/msgpack/unpack.h b/pandas/src/msgpack/unpack.h new file mode 100644 index 0000000000000..3dc88e5fbded0 --- /dev/null +++ b/pandas/src/msgpack/unpack.h @@ -0,0 +1,235 @@ +/* + * MessagePack for Python unpacking routine + * + * Copyright (C) 2009 Naoki INADA + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define MSGPACK_EMBED_STACK_SIZE (1024) +#include "unpack_define.h" + +typedef struct unpack_user { + int use_list; + PyObject *object_hook; + bool has_pairs_hook; + PyObject *list_hook; + const char *encoding; + const char *unicode_errors; +} unpack_user; + + +#define msgpack_unpack_struct(name) \ + struct template ## name + +#define msgpack_unpack_func(ret, name) \ + static inline ret template ## name + +#define msgpack_unpack_callback(name) \ + template_callback ## name + +#define msgpack_unpack_object PyObject* + +#define msgpack_unpack_user unpack_user + +typedef int (*execute_fn)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off); + +struct template_context; +typedef struct template_context template_context; + +static inline msgpack_unpack_object template_callback_root(unpack_user* u) +{ + return NULL; +} + +static inline int template_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) +{ + PyObject *p = PyInt_FromLong((long)d); + if (!p) + return -1; + *o = p; + return 0; +} +static inline int template_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) +{ + return template_callback_uint16(u, d, o); +} + + +static inline int template_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) +{ + PyObject *p; + if (d > LONG_MAX) { + p = PyLong_FromUnsignedLong((unsigned long)d); + } else { + p = PyInt_FromLong((long)d); + } + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) +{ + PyObject *p = PyLong_FromUnsignedLongLong(d); + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) +{ + PyObject *p = PyInt_FromLong(d); + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) +{ + return template_callback_int32(u, d, o); +} + +static inline int template_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) +{ + return template_callback_int32(u, d, o); +} + +static inline int template_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) +{ + PyObject *p = PyLong_FromLongLong(d); + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) +{ + PyObject *p = PyFloat_FromDouble(d); + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) +{ + return template_callback_double(u, d, o); +} + +static inline int template_callback_nil(unpack_user* u, msgpack_unpack_object* o) +{ Py_INCREF(Py_None); *o = Py_None; return 0; } + +static inline int template_callback_true(unpack_user* u, msgpack_unpack_object* o) +{ Py_INCREF(Py_True); *o = Py_True; return 0; } + +static inline int template_callback_false(unpack_user* u, msgpack_unpack_object* o) +{ Py_INCREF(Py_False); *o = Py_False; return 0; } + +static inline int template_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +{ + PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n); + + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) +{ + if (u->use_list) + PyList_SET_ITEM(*c, current, o); + else + PyTuple_SET_ITEM(*c, current, o); + return 0; +} + +static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c) +{ + if (u->list_hook) { + PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c); + if (!new_c) + return -1; + Py_DECREF(*c); + *c = new_c; + } + return 0; +} + +static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) +{ + PyObject *p; + if (u->has_pairs_hook) { + p = PyList_New(n); // Or use tuple? + } + else { + p = PyDict_New(); + } + if (!p) + return -1; + *o = p; + return 0; +} + +static inline int template_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) +{ + if (u->has_pairs_hook) { + msgpack_unpack_object item = PyTuple_Pack(2, k, v); + if (!item) + return -1; + Py_DECREF(k); + Py_DECREF(v); + PyList_SET_ITEM(*c, current, item); + return 0; + } + else if (PyDict_SetItem(*c, k, v) == 0) { + Py_DECREF(k); + Py_DECREF(v); + return 0; + } + return -1; +} + +static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c) +{ + if (u->object_hook) { + PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c); + if (!new_c) + return -1; + + Py_DECREF(*c); + *c = new_c; + } + return 0; +} + +static inline int template_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) +{ + PyObject *py; + if(u->encoding) { + py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); + } else { + py = PyBytes_FromStringAndSize(p, l); + } + if (!py) + return -1; + *o = py; + return 0; +} + +#include "unpack_template.h" diff --git a/pandas/src/msgpack/unpack_define.h b/pandas/src/msgpack/unpack_define.h new file mode 100644 index 0000000000000..959d3519e7b5c --- /dev/null +++ b/pandas/src/msgpack/unpack_define.h @@ -0,0 +1,93 @@ +/* + * MessagePack unpacking routine template + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MSGPACK_UNPACK_DEFINE_H__ +#define MSGPACK_UNPACK_DEFINE_H__ + +#include "msgpack/sysdep.h" +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifndef MSGPACK_EMBED_STACK_SIZE +#define MSGPACK_EMBED_STACK_SIZE 32 +#endif + + +typedef enum { + CS_HEADER = 0x00, // nil + + //CS_ = 0x01, + //CS_ = 0x02, // false + //CS_ = 0x03, // true + + //CS_ = 0x04, + //CS_ = 0x05, + //CS_ = 0x06, + //CS_ = 0x07, + + //CS_ = 0x08, + //CS_ = 0x09, + CS_FLOAT = 0x0a, + CS_DOUBLE = 0x0b, + CS_UINT_8 = 0x0c, + CS_UINT_16 = 0x0d, + CS_UINT_32 = 0x0e, + CS_UINT_64 = 0x0f, + CS_INT_8 = 0x10, + CS_INT_16 = 0x11, + CS_INT_32 = 0x12, + CS_INT_64 = 0x13, + + //CS_ = 0x14, + //CS_ = 0x15, + //CS_BIG_INT_16 = 0x16, + //CS_BIG_INT_32 = 0x17, + //CS_BIG_FLOAT_16 = 0x18, + //CS_BIG_FLOAT_32 = 0x19, + CS_RAW_16 = 0x1a, + CS_RAW_32 = 0x1b, + CS_ARRAY_16 = 0x1c, + CS_ARRAY_32 = 0x1d, + CS_MAP_16 = 0x1e, + CS_MAP_32 = 0x1f, + + //ACS_BIG_INT_VALUE, + //ACS_BIG_FLOAT_VALUE, + ACS_RAW_VALUE, +} msgpack_unpack_state; + + +typedef enum { + CT_ARRAY_ITEM, + CT_MAP_KEY, + CT_MAP_VALUE, +} msgpack_container_type; + + +#ifdef __cplusplus +} +#endif + +#endif /* msgpack/unpack_define.h */ + diff --git a/pandas/src/msgpack/unpack_template.h b/pandas/src/msgpack/unpack_template.h new file mode 100644 index 0000000000000..83b6918dc6686 --- /dev/null +++ b/pandas/src/msgpack/unpack_template.h @@ -0,0 +1,492 @@ +/* + * MessagePack unpacking routine template + * + * Copyright (C) 2008-2010 FURUHASHI Sadayuki + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef msgpack_unpack_func +#error msgpack_unpack_func template is not defined +#endif + +#ifndef msgpack_unpack_callback +#error msgpack_unpack_callback template is not defined +#endif + +#ifndef msgpack_unpack_struct +#error msgpack_unpack_struct template is not defined +#endif + +#ifndef msgpack_unpack_struct_decl +#define msgpack_unpack_struct_decl(name) msgpack_unpack_struct(name) +#endif + +#ifndef msgpack_unpack_object +#error msgpack_unpack_object type is not defined +#endif + +#ifndef msgpack_unpack_user +#error msgpack_unpack_user type is not defined +#endif + +#ifndef USE_CASE_RANGE +#if !defined(_MSC_VER) +#define USE_CASE_RANGE +#endif +#endif + +msgpack_unpack_struct_decl(_stack) { + msgpack_unpack_object obj; + size_t size; + size_t count; + unsigned int ct; + msgpack_unpack_object map_key; +}; + +msgpack_unpack_struct_decl(_context) { + msgpack_unpack_user user; + unsigned int cs; + unsigned int trail; + unsigned int top; + /* + msgpack_unpack_struct(_stack)* stack; + unsigned int stack_size; + msgpack_unpack_struct(_stack) embed_stack[MSGPACK_EMBED_STACK_SIZE]; + */ + msgpack_unpack_struct(_stack) stack[MSGPACK_EMBED_STACK_SIZE]; +}; + + +msgpack_unpack_func(void, _init)(msgpack_unpack_struct(_context)* ctx) +{ + ctx->cs = CS_HEADER; + ctx->trail = 0; + ctx->top = 0; + /* + ctx->stack = ctx->embed_stack; + ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; + */ + ctx->stack[0].obj = msgpack_unpack_callback(_root)(&ctx->user); +} + +/* +msgpack_unpack_func(void, _destroy)(msgpack_unpack_struct(_context)* ctx) +{ + if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { + free(ctx->stack); + } +} +*/ + +msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context)* ctx) +{ + return (ctx)->stack[0].obj; +} + + +template +msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +{ + assert(len >= *off); + + const unsigned char* p = (unsigned char*)data + *off; + const unsigned char* const pe = (unsigned char*)data + len; + const void* n = NULL; + + unsigned int trail = ctx->trail; + unsigned int cs = ctx->cs; + unsigned int top = ctx->top; + msgpack_unpack_struct(_stack)* stack = ctx->stack; + /* + unsigned int stack_size = ctx->stack_size; + */ + msgpack_unpack_user* user = &ctx->user; + + msgpack_unpack_object obj; + msgpack_unpack_struct(_stack)* c = NULL; + + int ret; + +#define construct_cb(name) \ + construct && msgpack_unpack_callback(name) + +#define push_simple_value(func) \ + if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ + goto _push +#define push_fixed_value(func, arg) \ + if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ + goto _push +#define push_variable_value(func, base, pos, len) \ + if(construct_cb(func)(user, \ + (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ + goto _push + +#define again_fixed_trail(_cs, trail_len) \ + trail = trail_len; \ + cs = _cs; \ + goto _fixed_trail_again +#define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ + trail = trail_len; \ + if(trail == 0) { goto ifzero; } \ + cs = _cs; \ + goto _fixed_trail_again + +#define start_container(func, count_, ct_) \ + if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ + if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ + if((count_) == 0) { obj = stack[top].obj; \ + if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ + goto _push; } \ + stack[top].ct = ct_; \ + stack[top].size = count_; \ + stack[top].count = 0; \ + ++top; \ + /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ + /*printf("stack push %d\n", top);*/ \ + /* FIXME \ + if(top >= stack_size) { \ + if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ + size_t csize = sizeof(msgpack_unpack_struct(_stack)) * MSGPACK_EMBED_STACK_SIZE; \ + size_t nsize = csize * 2; \ + msgpack_unpack_struct(_stack)* tmp = (msgpack_unpack_struct(_stack)*)malloc(nsize); \ + if(tmp == NULL) { goto _failed; } \ + memcpy(tmp, ctx->stack, csize); \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ + } else { \ + size_t nsize = sizeof(msgpack_unpack_struct(_stack)) * ctx->stack_size * 2; \ + msgpack_unpack_struct(_stack)* tmp = (msgpack_unpack_struct(_stack)*)realloc(ctx->stack, nsize); \ + if(tmp == NULL) { goto _failed; } \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = stack_size * 2; \ + } \ + } \ + */ \ + goto _header_again + +#define NEXT_CS(p) \ + ((unsigned int)*p & 0x1f) + +#ifdef USE_CASE_RANGE +#define SWITCH_RANGE_BEGIN switch(*p) { +#define SWITCH_RANGE(FROM, TO) case FROM ... TO: +#define SWITCH_RANGE_DEFAULT default: +#define SWITCH_RANGE_END } +#else +#define SWITCH_RANGE_BEGIN { if(0) { +#define SWITCH_RANGE(FROM, TO) } else if(FROM <= *p && *p <= TO) { +#define SWITCH_RANGE_DEFAULT } else { +#define SWITCH_RANGE_END } } +#endif + + if(p == pe) { goto _out; } + do { + switch(cs) { + case CS_HEADER: + SWITCH_RANGE_BEGIN + SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum + push_fixed_value(_uint8, *(uint8_t*)p); + SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum + push_fixed_value(_int8, *(int8_t*)p); + SWITCH_RANGE(0xc0, 0xdf) // Variable + switch(*p) { + case 0xc0: // nil + push_simple_value(_nil); + //case 0xc1: // string + // again_terminal_trail(NEXT_CS(p), p+1); + case 0xc2: // false + push_simple_value(_false); + case 0xc3: // true + push_simple_value(_true); + //case 0xc4: + //case 0xc5: + //case 0xc6: + //case 0xc7: + //case 0xc8: + //case 0xc9: + case 0xca: // float + case 0xcb: // double + case 0xcc: // unsigned int 8 + case 0xcd: // unsigned int 16 + case 0xce: // unsigned int 32 + case 0xcf: // unsigned int 64 + case 0xd0: // signed int 8 + case 0xd1: // signed int 16 + case 0xd2: // signed int 32 + case 0xd3: // signed int 64 + again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); + //case 0xd4: + //case 0xd5: + //case 0xd6: // big integer 16 + //case 0xd7: // big integer 32 + //case 0xd8: // big float 16 + //case 0xd9: // big float 32 + case 0xda: // raw 16 + case 0xdb: // raw 32 + case 0xdc: // array 16 + case 0xdd: // array 32 + case 0xde: // map 16 + case 0xdf: // map 32 + again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); + default: + goto _failed; + } + SWITCH_RANGE(0xa0, 0xbf) // FixRaw + again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); + SWITCH_RANGE(0x90, 0x9f) // FixArray + start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); + SWITCH_RANGE(0x80, 0x8f) // FixMap + start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); + + SWITCH_RANGE_DEFAULT + goto _failed; + SWITCH_RANGE_END + // end CS_HEADER + + + _fixed_trail_again: + ++p; + + default: + if((size_t)(pe - p) < trail) { goto _out; } + n = p; p += trail - 1; + switch(cs) { + //case CS_ + //case CS_ + case CS_FLOAT: { + union { uint32_t i; float f; } mem; + mem.i = _msgpack_load32(uint32_t,n); + push_fixed_value(_float, mem.f); } + case CS_DOUBLE: { + union { uint64_t i; double f; } mem; + mem.i = _msgpack_load64(uint64_t,n); +#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi + // https://github.com/msgpack/msgpack-perl/pull/1 + mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); +#endif + push_fixed_value(_double, mem.f); } + case CS_UINT_8: + push_fixed_value(_uint8, *(uint8_t*)n); + case CS_UINT_16: + push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); + case CS_UINT_32: + push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); + case CS_UINT_64: + push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); + + case CS_INT_8: + push_fixed_value(_int8, *(int8_t*)n); + case CS_INT_16: + push_fixed_value(_int16, _msgpack_load16(int16_t,n)); + case CS_INT_32: + push_fixed_value(_int32, _msgpack_load32(int32_t,n)); + case CS_INT_64: + push_fixed_value(_int64, _msgpack_load64(int64_t,n)); + + //case CS_ + //case CS_ + //case CS_BIG_INT_16: + // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero); + //case CS_BIG_INT_32: + // again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero); + //case ACS_BIG_INT_VALUE: + //_big_int_zero: + // // FIXME + // push_variable_value(_big_int, data, n, trail); + + //case CS_BIG_FLOAT_16: + // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero); + //case CS_BIG_FLOAT_32: + // again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero); + //case ACS_BIG_FLOAT_VALUE: + //_big_float_zero: + // // FIXME + // push_variable_value(_big_float, data, n, trail); + + case CS_RAW_16: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); + case CS_RAW_32: + again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); + case ACS_RAW_VALUE: + _raw_zero: + push_variable_value(_raw, data, n, trail); + + case CS_ARRAY_16: + start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); + case CS_ARRAY_32: + /* FIXME security guard */ + start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); + + case CS_MAP_16: + start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); + case CS_MAP_32: + /* FIXME security guard */ + start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); + + default: + goto _failed; + } + } + +_push: + if(top == 0) { goto _finish; } + c = &stack[top-1]; + switch(c->ct) { + case CT_ARRAY_ITEM: + if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + goto _header_again; + case CT_MAP_KEY: + c->map_key = obj; + c->ct = CT_MAP_VALUE; + goto _header_again; + case CT_MAP_VALUE: + if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } + if(++c->count == c->size) { + obj = c->obj; + if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } + --top; + /*printf("stack pop %d\n", top);*/ + goto _push; + } + c->ct = CT_MAP_KEY; + goto _header_again; + + default: + goto _failed; + } + +_header_again: + cs = CS_HEADER; + ++p; + } while(p != pe); + goto _out; + + +_finish: + if (!construct) + msgpack_unpack_callback(_nil)(user, &obj); + stack[0].obj = obj; + ++p; + ret = 1; + /*printf("-- finish --\n"); */ + goto _end; + +_failed: + /*printf("** FAILED **\n"); */ + ret = -1; + goto _end; + +_out: + ret = 0; + goto _end; + +_end: + ctx->cs = cs; + ctx->trail = trail; + ctx->top = top; + *off = p - (const unsigned char*)data; + + return ret; +#undef construct_cb +} + +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END +#undef push_simple_value +#undef push_fixed_value +#undef push_variable_value +#undef again_fixed_trail +#undef again_fixed_trail_if_zero +#undef start_container + +template +msgpack_unpack_func(int, _container_header)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + msgpack_unpack_callback(_uint32)(&ctx->user, size, &ctx->stack[0].obj); + return 1; +} + +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END + +static const execute_fn template_construct = &template_execute; +static const execute_fn template_skip = &template_execute; +static const execute_fn read_array_header = &template_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &template_container_header<0x80, 0xde>; + +#undef msgpack_unpack_func +#undef msgpack_unpack_callback +#undef msgpack_unpack_struct +#undef msgpack_unpack_object +#undef msgpack_unpack_user + +#undef NEXT_CS + +/* vim: set ts=4 sw=4 noexpandtab */ diff --git a/pandas/tests/test_msgpack/__init__.py b/pandas/tests/test_msgpack/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/pandas/tests/test_msgpack/test_buffer.py b/pandas/tests/test_msgpack/test_buffer.py new file mode 100644 index 0000000000000..940b65406103e --- /dev/null +++ b/pandas/tests/test_msgpack/test_buffer.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python +# coding: utf-8 + +from pandas.msgpack import packb, unpackb + + +def test_unpack_buffer(): + from array import array + buf = array('b') + buf.fromstring(packb(('foo', 'bar'))) + obj = unpackb(buf, use_list=1) + assert [b'foo', b'bar'] == obj diff --git a/pandas/tests/test_msgpack/test_case.py b/pandas/tests/test_msgpack/test_case.py new file mode 100644 index 0000000000000..e78456b2ddb62 --- /dev/null +++ b/pandas/tests/test_msgpack/test_case.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python +# coding: utf-8 + +from pandas.msgpack import packb, unpackb + + +def check(length, obj): + v = packb(obj) + assert len(v) == length, \ + "%r length should be %r but get %r" % (obj, length, len(v)) + assert unpackb(v, use_list=0) == obj + +def test_1(): + for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, + -((1<<5)-1), -(1<<5)]: + check(1, o) + +def test_2(): + for o in [1 << 7, (1 << 8) - 1, + -((1<<5)+1), -(1<<7) + ]: + check(2, o) + +def test_3(): + for o in [1 << 8, (1 << 16) - 1, + -((1<<7)+1), -(1<<15)]: + check(3, o) + +def test_5(): + for o in [1 << 16, (1 << 32) - 1, + -((1<<15)+1), -(1<<31)]: + check(5, o) + +def test_9(): + for o in [1 << 32, (1 << 64) - 1, + -((1<<31)+1), -(1<<63), + 1.0, 0.1, -0.1, -1.0]: + check(9, o) + + +def check_raw(overhead, num): + check(num + overhead, b" " * num) + +def test_fixraw(): + check_raw(1, 0) + check_raw(1, (1<<5) - 1) + +def test_raw16(): + check_raw(3, 1<<5) + check_raw(3, (1<<16) - 1) + +def test_raw32(): + check_raw(5, 1<<16) + + +def check_array(overhead, num): + check(num + overhead, (None,) * num) + +def test_fixarray(): + check_array(1, 0) + check_array(1, (1 << 4) - 1) + +def test_array16(): + check_array(3, 1 << 4) + check_array(3, (1<<16)-1) + +def test_array32(): + check_array(5, (1<<16)) + + +def match(obj, buf): + assert packb(obj) == buf + assert unpackb(buf, use_list=0) == obj + +def test_match(): + cases = [ + (None, b'\xc0'), + (False, b'\xc2'), + (True, b'\xc3'), + (0, b'\x00'), + (127, b'\x7f'), + (128, b'\xcc\x80'), + (256, b'\xcd\x01\x00'), + (-1, b'\xff'), + (-33, b'\xd0\xdf'), + (-129, b'\xd1\xff\x7f'), + ({1:1}, b'\x81\x01\x01'), + (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), + ((), b'\x90'), + (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), + (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), + ({}, b'\x80'), + (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), + (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), + ] + + for v, p in cases: + match(v, p) + +def test_unicode(): + assert unpackb(packb('foobar'), use_list=1) == b'foobar' diff --git a/pandas/tests/test_msgpack/test_except.py b/pandas/tests/test_msgpack/test_except.py new file mode 100644 index 0000000000000..a0239336ca20d --- /dev/null +++ b/pandas/tests/test_msgpack/test_except.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# coding: utf-8 + +import unittest +import nose + +import datetime +from pandas.msgpack import packb, unpackb + +class DummyException(Exception): + pass + +class TestExceptions(unittest.TestCase): + + def test_raise_on_find_unsupported_value(self): + import datetime + self.assertRaises(TypeError, packb, datetime.datetime.now()) + + def test_raise_from_object_hook(self): + def hook(obj): + raise DummyException + self.assertRaises(DummyException, unpackb, packb({}), object_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) + self.assertRaises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) + + def test_invalidvalue(self): + self.assertRaises(ValueError, unpackb, b'\xd9\x97#DL_') diff --git a/pandas/tests/test_msgpack/test_format.py b/pandas/tests/test_msgpack/test_format.py new file mode 100644 index 0000000000000..a3a3afd046ce2 --- /dev/null +++ b/pandas/tests/test_msgpack/test_format.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# coding: utf-8 + +from pandas.msgpack import unpackb + +def check(src, should, use_list=0): + assert unpackb(src, use_list=use_list) == should + +def testSimpleValue(): + check(b"\x93\xc0\xc2\xc3", + (None, False, True,)) + +def testFixnum(): + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", + ((0,64,127,), (-32,-16,-1,),) + ) + +def testFixArray(): + check(b"\x92\x90\x91\x91\xc0", + ((),((None,),),), + ) + +def testFixRaw(): + check(b"\x94\xa0\xa1a\xa2bc\xa3def", + (b"", b"a", b"bc", b"def",), + ) + +def testFixMap(): + check( + b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", + {False: {None: None}, True:{None:{}}}, + ) + +def testUnsignedInt(): + check( + b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" + b"\xce\xff\xff\xff\xff", + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), + ) + +def testSignedInt(): + check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" + b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" + b"\xd2\xff\xff\xff\xff", + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) + +def testRaw(): + check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", + (b"", b"a", b"ab", b"", b"a", b"ab")) + +def testArray(): + check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" + b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" + b"\xc2\xc3", + ((), (None,), (False,True), (), (None,), (False,True)) + ) + +def testMap(): + check( + b"\x96" + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", + ({}, {None: False}, {True: False, None: False}, {}, + {None: False}, {True: False, None: False})) diff --git a/pandas/tests/test_msgpack/test_obj.py b/pandas/tests/test_msgpack/test_obj.py new file mode 100644 index 0000000000000..4a018bc8b87f1 --- /dev/null +++ b/pandas/tests/test_msgpack/test_obj.py @@ -0,0 +1,71 @@ +# coding: utf-8 + +import unittest +import nose + +import datetime +from pandas.msgpack import packb, unpackb + +class DecodeError(Exception): + pass + +class TestObj(unittest.TestCase): + + def _arr_to_str(self, arr): + return ''.join(str(c) for c in arr) + + def bad_complex_decoder(self, o): + raise DecodeError("Ooops!") + + def _decode_complex(self, obj): + if b'__complex__' in obj: + return complex(obj[b'real'], obj[b'imag']) + return obj + + def _encode_complex(self, obj): + if isinstance(obj, complex): + return {b'__complex__': True, b'real': 1, b'imag': 2} + return obj + + def test_encode_hook(self): + packed = packb([3, 1+2j], default=self._encode_complex) + unpacked = unpackb(packed, use_list=1) + assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2} + + def test_decode_hook(self): + packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) + unpacked = unpackb(packed, object_hook=self._decode_complex, use_list=1) + assert unpacked[1] == 1+2j + + def test_decode_pairs_hook(self): + packed = packb([3, {1: 2, 3: 4}]) + prod_sum = 1 * 2 + 3 * 4 + unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1) + assert unpacked[1] == prod_sum + + def test_only_one_obj_hook(self): + self.assertRaises(ValueError, unpackb, b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) + + def test_bad_hook(self): + def f(): + packed = packb([3, 1+2j], default=lambda o: o) + unpacked = unpackb(packed, use_list=1) + self.assertRaises(ValueError, f) + + def test_array_hook(self): + packed = packb([1,2,3]) + unpacked = unpackb(packed, list_hook=self._arr_to_str, use_list=1) + assert unpacked == '123' + + def test_an_exception_in_objecthook1(self): + def f(): + packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) + unpackb(packed, object_hook=self.bad_complex_decoder) + self.assertRaises(DecodeError, f) + + + def test_an_exception_in_objecthook2(self): + def f(): + packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) + unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1) + self.assertRaises(DecodeError, f) diff --git a/pandas/tests/test_msgpack/test_pack.py b/pandas/tests/test_msgpack/test_pack.py new file mode 100644 index 0000000000000..22df6df5e2e45 --- /dev/null +++ b/pandas/tests/test_msgpack/test_pack.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# coding: utf-8 + +import unittest +import nose + +import struct +from pandas import compat +from pandas.compat import u, OrderedDict +from pandas.msgpack import packb, unpackb, Unpacker, Packer + +class TestPack(unittest.TestCase): + + def check(self, data, use_list=False): + re = unpackb(packb(data), use_list=use_list) + assert re == data + + def testPack(self): + test_data = [ + 0, 1, 127, 128, 255, 256, 65535, 65536, + -1, -32, -33, -128, -129, -32768, -32769, + 1.0, + b"", b"a", b"a"*31, b"a"*32, + None, True, False, + (), ((),), ((), None,), + {None: 0}, + (1<<23), + ] + for td in test_data: + self.check(td) + + def testPackUnicode(self): + test_data = [ + u(""), u("abcd"), [u("defgh")], u("Русский текст"), + ] + for td in test_data: + re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') + assert re == td + packer = Packer(encoding='utf-8') + data = packer.pack(td) + re = Unpacker(compat.BytesIO(data), encoding='utf-8', use_list=1).unpack() + assert re == td + + def testPackUTF32(self): + test_data = [ + compat.u(""), + compat.u("abcd"), + [compat.u("defgh")], + compat.u("Русский текст"), + ] + for td in test_data: + re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') + assert re == td + + def testPackBytes(self): + test_data = [ + b"", b"abcd", (b"defgh",), + ] + for td in test_data: + self.check(td) + + def testIgnoreUnicodeErrors(self): + re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) + assert re == "abcdef" + + def testStrictUnicodeUnpack(self): + self.assertRaises(UnicodeDecodeError, unpackb, packb(b'abc\xeddef'), encoding='utf-8', use_list=1) + + def testStrictUnicodePack(self): + self.assertRaises(UnicodeEncodeError, packb, compat.u("abc\xeddef"), encoding='ascii', unicode_errors='strict') + + def testIgnoreErrorsPack(self): + re = unpackb(packb(compat.u("abcФФФdef"), encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1) + assert re == compat.u("abcdef") + + def testNoEncoding(self): + self.assertRaises(TypeError, packb, compat.u("abc"), encoding=None) + + def testDecodeBinary(self): + re = unpackb(packb("abc"), encoding=None, use_list=1) + assert re == b"abc" + + def testPackFloat(self): + assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack('>f', 1.0) + assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack('>d', 1.0) + + def testArraySize(self, sizes=[0, 5, 50, 1000]): + bio = compat.BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_array_header(size)) + for i in range(size): + bio.write(packer.pack(i)) + + bio.seek(0) + unpacker = Unpacker(bio, use_list=1) + for size in sizes: + assert unpacker.unpack() == list(range(size)) + + def test_manualreset(self, sizes=[0, 5, 50, 1000]): + packer = Packer(autoreset=False) + for size in sizes: + packer.pack_array_header(size) + for i in range(size): + packer.pack(i) + + bio = compat.BytesIO(packer.bytes()) + unpacker = Unpacker(bio, use_list=1) + for size in sizes: + assert unpacker.unpack() == list(range(size)) + + packer.reset() + assert packer.bytes() == b'' + + def testMapSize(self, sizes=[0, 5, 50, 1000]): + bio = compat.BytesIO() + packer = Packer() + for size in sizes: + bio.write(packer.pack_map_header(size)) + for i in range(size): + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value + + bio.seek(0) + unpacker = Unpacker(bio) + for size in sizes: + assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) + + + def test_odict(self): + seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] + od = OrderedDict(seq) + assert unpackb(packb(od), use_list=1) == dict(seq) + def pair_hook(seq): + return list(seq) + assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq + + + def test_pairlist(self): + pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')] + packer = Packer() + packed = packer.pack_map_pairs(pairlist) + unpacked = unpackb(packed, object_pairs_hook=list) + assert pairlist == unpacked diff --git a/pandas/tests/test_msgpack/test_read_size.py b/pandas/tests/test_msgpack/test_read_size.py new file mode 100644 index 0000000000000..db3e1deb04f8f --- /dev/null +++ b/pandas/tests/test_msgpack/test_read_size.py @@ -0,0 +1,65 @@ +"""Test Unpacker's read_array_header and read_map_header methods""" +from pandas.msgpack import packb, Unpacker, OutOfData +UnexpectedTypeException = ValueError + +def test_read_array_header(): + unpacker = Unpacker() + unpacker.feed(packb(['a', 'b', 'c'])) + assert unpacker.read_array_header() == 3 + assert unpacker.unpack() == b'a' + assert unpacker.unpack() == b'b' + assert unpacker.unpack() == b'c' + try: + unpacker.unpack() + assert 0, 'should raise exception' + except OutOfData: + assert 1, 'okay' + + +def test_read_map_header(): + unpacker = Unpacker() + unpacker.feed(packb({'a': 'A'})) + assert unpacker.read_map_header() == 1 + assert unpacker.unpack() == B'a' + assert unpacker.unpack() == B'A' + try: + unpacker.unpack() + assert 0, 'should raise exception' + except OutOfData: + assert 1, 'okay' + +def test_incorrect_type_array(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_array_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_incorrect_type_map(): + unpacker = Unpacker() + unpacker.feed(packb(1)) + try: + unpacker.read_map_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_correct_type_nested_array(): + unpacker = Unpacker() + unpacker.feed(packb({'a': ['b', 'c', 'd']})) + try: + unpacker.read_array_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' + +def test_incorrect_type_nested_map(): + unpacker = Unpacker() + unpacker.feed(packb([{'a': 'b'}])) + try: + unpacker.read_map_header() + assert 0, 'should raise exception' + except UnexpectedTypeException: + assert 1, 'okay' diff --git a/pandas/tests/test_msgpack/test_seq.py b/pandas/tests/test_msgpack/test_seq.py new file mode 100644 index 0000000000000..e5ee68c4cab84 --- /dev/null +++ b/pandas/tests/test_msgpack/test_seq.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# coding: utf-8 + +from pandas import compat +from pandas.compat import u +import pandas.msgpack as msgpack + +binarydata = [chr(i) for i in range(256)] +binarydata = "".join(binarydata) +if compat.PY3: + binarydata = binarydata.encode('utf-8') + +def gen_binary_data(idx): + data = binarydata[:idx % 300] + return data + +def test_exceeding_unpacker_read_size(): + dumpf = compat.BytesIO() + + packer = msgpack.Packer() + + NUMBER_OF_STRINGS = 6 + read_size = 16 + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + + for idx in range(NUMBER_OF_STRINGS): + data = gen_binary_data(idx) + dumpf.write(packer.pack(data)) + + f = compat.BytesIO(dumpf.getvalue()) + dumpf.close() + + unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1) + + read_count = 0 + for idx, o in enumerate(unpacker): + assert type(o) == bytes + assert o == gen_binary_data(idx) + read_count += 1 + + assert read_count == NUMBER_OF_STRINGS diff --git a/pandas/tests/test_msgpack/test_sequnpack.py b/pandas/tests/test_msgpack/test_sequnpack.py new file mode 100644 index 0000000000000..4c3ad363e5b6e --- /dev/null +++ b/pandas/tests/test_msgpack/test_sequnpack.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python +# coding: utf-8 + +import unittest +import nose + +from pandas import compat +from pandas.msgpack import Unpacker, BufferFull +from pandas.msgpack import OutOfData + +class TestPack(unittest.TestCase): + + def test_partialdata(self): + unpacker = Unpacker() + unpacker.feed(b'\xa5') + self.assertRaises(StopIteration, next, iter(unpacker)) + unpacker.feed(b'h') + self.assertRaises(StopIteration, next, iter(unpacker)) + unpacker.feed(b'a') + self.assertRaises(StopIteration, next, iter(unpacker)) + unpacker.feed(b'l') + self.assertRaises(StopIteration, next, iter(unpacker)) + unpacker.feed(b'l') + self.assertRaises(StopIteration, next, iter(unpacker)) + unpacker.feed(b'o') + assert next(iter(unpacker)) == b'hallo' + + def test_foobar(self): + unpacker = Unpacker(read_size=3, use_list=1) + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b'b') + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') + self.assertRaises(OutOfData, unpacker.unpack) + + unpacker.feed(b'foo') + unpacker.feed(b'bar') + + k = 0 + for o, e in zip(unpacker, 'foobarbaz'): + assert o == ord(e) + k += 1 + assert k == len(b'foobar') + + def test_foobar_skip(self): + unpacker = Unpacker(read_size=3, use_list=1) + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + unpacker.skip() + assert unpacker.unpack() == ord(b'o') + unpacker.skip() + assert unpacker.unpack() == ord(b'a') + unpacker.skip() + self.assertRaises(OutOfData, unpacker.unpack) + + def test_maxbuffersize(self): + self.assertRaises(ValueError, Unpacker, read_size=5, max_buffer_size=3) + unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) + unpacker.feed(b'fo') + self.assertRaises(BufferFull, unpacker.feed, b'ob') + unpacker.feed(b'o') + assert ord('f') == next(unpacker) + unpacker.feed(b'b') + assert ord('o') == next(unpacker) + assert ord('o') == next(unpacker) + assert ord('b') == next(unpacker) + + def test_readbytes(self): + unpacker = Unpacker(read_size=3) + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + assert unpacker.read_bytes(3) == b'oob' + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') + + # Test buffer refill + unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3) + assert unpacker.unpack() == ord(b'f') + assert unpacker.read_bytes(3) == b'oob' + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') diff --git a/pandas/tests/test_msgpack/test_subtype.py b/pandas/tests/test_msgpack/test_subtype.py new file mode 100644 index 0000000000000..0934b31cebeda --- /dev/null +++ b/pandas/tests/test_msgpack/test_subtype.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# coding: utf-8 + +from pandas.msgpack import packb, unpackb +from collections import namedtuple + +class MyList(list): + pass + +class MyDict(dict): + pass + +class MyTuple(tuple): + pass + +MyNamedTuple = namedtuple('MyNamedTuple', 'x y') + +def test_types(): + assert packb(MyDict()) == packb(dict()) + assert packb(MyList()) == packb(list()) + assert packb(MyNamedTuple(1, 2)) == packb((1, 2)) diff --git a/pandas/tests/test_msgpack/test_unpack_raw.py b/pandas/tests/test_msgpack/test_unpack_raw.py new file mode 100644 index 0000000000000..0e96a79cf190a --- /dev/null +++ b/pandas/tests/test_msgpack/test_unpack_raw.py @@ -0,0 +1,28 @@ +"""Tests for cases where the user seeks to obtain packed msgpack objects""" + +from pandas import compat +from pandas.msgpack import Unpacker, packb + +def test_write_bytes(): + unpacker = Unpacker() + unpacker.feed(b'abc') + f = compat.BytesIO() + assert unpacker.unpack(f.write) == ord('a') + assert f.getvalue() == b'a' + f = compat.BytesIO() + assert unpacker.skip(f.write) is None + assert f.getvalue() == b'b' + f = compat.BytesIO() + assert unpacker.skip() is None + assert f.getvalue() == b'' + + +def test_write_bytes_multi_buffer(): + long_val = (5) * 100 + expected = packb(long_val) + unpacker = Unpacker(compat.BytesIO(expected), read_size=3, max_buffer_size=3) + + f = compat.BytesIO() + unpacked = unpacker.unpack(f.write) + assert unpacked == long_val + assert f.getvalue() == expected diff --git a/setup.py b/setup.py index ffd6089bdc88d..c326d14f552e0 100755 --- a/setup.py +++ b/setup.py @@ -464,6 +464,23 @@ def pxd(name): extensions.extend([sparse_ext]) +#---------------------------------------------------------------------- +# msgpack stuff here + +if sys.byteorder == 'big': + macros = [('__BIG_ENDIAN__', '1')] +else: + macros = [('__LITTLE_ENDIAN__', '1')] + +msgpack_ext = Extension('pandas.msgpack', + sources = [srcpath('msgpack', + suffix=suffix, subdir='')], + language='c++', + include_dirs=common_include, + define_macros=macros) + +extensions.append(msgpack_ext) + # if not ISRELEASED: # extensions.extend([sandbox_ext]) @@ -517,6 +534,7 @@ def pxd(name): 'pandas.stats', 'pandas.util', 'pandas.tests', + 'pandas.tests.test_msgpack', 'pandas.tools', 'pandas.tools.tests', 'pandas.tseries', diff --git a/vb_suite/packers.py b/vb_suite/packers.py new file mode 100644 index 0000000000000..9af6a6b1b0c4e --- /dev/null +++ b/vb_suite/packers.py @@ -0,0 +1,94 @@ +from vbench.api import Benchmark +from datetime import datetime + +start_date = datetime(2013, 5, 1) + +common_setup = """from pandas_vb_common import * +import os +import pandas as pd +from pandas.core import common as com + +f = '__test__.msg' +def remove(f): + try: + os.remove(f) + except: + pass + +index = date_range('20000101',periods=50000,freq='H') +df = DataFrame({'float1' : randn(50000), + 'float2' : randn(50000)}, + index=index) +remove(f) +""" + +#---------------------------------------------------------------------- +# msgpack + +setup = common_setup + """ +df.to_msgpack(f) +""" + +packers_read_pack = Benchmark("pd.read_msgpack(f)", setup, start_date=start_date) + +setup = common_setup + """ +""" + +packers_write_pack = Benchmark("df.to_msgpack(f)", setup, cleanup="remove(f)", start_date=start_date) + +#---------------------------------------------------------------------- +# pickle + +setup = common_setup + """ +df.to_pickle(f) +""" + +packers_read_pickle = Benchmark("pd.read_pickle(f)", setup, start_date=start_date) + +setup = common_setup + """ +""" + +packers_write_pickle = Benchmark("df.to_pickle(f)", setup, cleanup="remove(f)", start_date=start_date) + +#---------------------------------------------------------------------- +# csv + +setup = common_setup + """ +df.to_csv(f) +""" + +packers_read_csv = Benchmark("pd.read_csv(f)", setup, start_date=start_date) + +setup = common_setup + """ +""" + +packers_write_csv = Benchmark("df.to_csv(f)", setup, cleanup="remove(f)", start_date=start_date) + +#---------------------------------------------------------------------- +# hdf store + +setup = common_setup + """ +df.to_hdf(f,'df') +""" + +packers_read_hdf_store = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) + +setup = common_setup + """ +""" + +packers_write_hdf_store = Benchmark("df.to_hdf(f,'df')", setup, cleanup="remove(f)", start_date=start_date) + +#---------------------------------------------------------------------- +# hdf table + +setup = common_setup + """ +df.to_hdf(f,'df',table=True) +""" + +packers_read_hdf_table = Benchmark("pd.read_hdf(f,'df')", setup, start_date=start_date) + +setup = common_setup + """ +""" + +packers_write_hdf_table = Benchmark("df.to_hdf(f,'df',table=True)", setup, cleanup="remove(f)", start_date=start_date) + diff --git a/vb_suite/suite.py b/vb_suite/suite.py index 57920fcbf7c19..e5002ef78ab9b 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -16,6 +16,7 @@ 'join_merge', 'miscellaneous', 'panel_ctor', + 'packers', 'parser', 'plotting', 'reindex',