From 7f044b7d03a1639f3173e5ec7574ff699144eb0a Mon Sep 17 00:00:00 2001 From: Ka Wo Chen Date: Sun, 10 Jan 2016 18:01:00 -0500 Subject: [PATCH] BUG: GH11880 where __contains__ fails in unpacked DataFrame with object columns --- doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/window.py | 1 + pandas/hashtable.pyx | 4 +- pandas/io/tests/test_packers.py | 74 ++++++++++++++++++++++++++++++--- 4 files changed, 72 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 4ce2ce5b69cb4..3496e9eea834c 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -463,6 +463,7 @@ Bug Fixes - Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`) - Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`) +- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`) - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`) diff --git a/pandas/core/window.py b/pandas/core/window.py index 1e5816e898baa..ce8fda9e932bc 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs): Use a standard estimation bias correction """ + class EWM(_Rolling): r""" Provides exponential weighted functions diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx index 58e9d64921e0d..a5fcbd3f2d0f1 100644 --- a/pandas/hashtable.pyx +++ b/pandas/hashtable.pyx @@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable): self.table.vals[k] = values[i] @cython.boundscheck(False) - def map_locations(self, int64_t[:] values): + def map_locations(self, ndarray[int64_t, ndim=1] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 @@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable): return np.asarray(labels) @cython.boundscheck(False) - def map_locations(self, float64_t[:] values): + def map_locations(self, ndarray[float64_t, ndim=1] values): cdef: Py_ssize_t i, n = len(values) int ret = 0 diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py index d6a9feb1bd8f4..61b24c858b60d 100644 --- a/pandas/io/tests/test_packers.py +++ b/pandas/io/tests/test_packers.py @@ -9,8 +9,8 @@ from pandas import compat from pandas.compat import u from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range, - date_range, period_range, Index, SparseSeries, SparseDataFrame, - SparsePanel) + date_range, period_range, Index) +from pandas.io.packers import to_msgpack, read_msgpack import pandas.util.testing as tm from pandas.util.testing import (ensure_clean, assert_index_equal, assert_series_equal, @@ -23,7 +23,19 @@ nan = np.nan -from pandas.io.packers import to_msgpack, read_msgpack +try: + import blosc # NOQA +except ImportError: + _BLOSC_INSTALLED = False +else: + _BLOSC_INSTALLED = True + +try: + import zlib # NOQA +except ImportError: + _ZLIB_INSTALLED = False +else: + _ZLIB_INSTALLED = True _multiprocess_can_split_ = False @@ -483,6 +495,14 @@ class TestCompression(TestPackers): """ def setUp(self): + try: + from sqlalchemy import create_engine + self._create_sql_engine = create_engine + except ImportError: + self._SQLALCHEMY_INSTALLED = False + else: + self._SQLALCHEMY_INSTALLED = True + super(TestCompression, self).setUp() data = { 'A': np.arange(1000, dtype=np.float64), @@ -508,14 +528,56 @@ def test_compression_zlib(self): assert_frame_equal(self.frame[k], i_rec[k]) def test_compression_blosc(self): - try: - import blosc - except ImportError: + if not _BLOSC_INSTALLED: raise nose.SkipTest('no blosc') i_rec = self.encode_decode(self.frame, compress='blosc') for k in self.frame.keys(): assert_frame_equal(self.frame[k], i_rec[k]) + def test_readonly_axis_blosc(self): + # GH11880 + if not _BLOSC_INSTALLED: + raise nose.SkipTest('no blosc') + df1 = DataFrame({'A': list('abcd')}) + df2 = DataFrame(df1, index=[1., 2., 3., 4.]) + self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc')) + self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc')) + + def test_readonly_axis_zlib(self): + # GH11880 + df1 = DataFrame({'A': list('abcd')}) + df2 = DataFrame(df1, index=[1., 2., 3., 4.]) + self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib')) + self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib')) + + def test_readonly_axis_blosc_to_sql(self): + # GH11880 + if not _BLOSC_INSTALLED: + raise nose.SkipTest('no blosc') + if not self._SQLALCHEMY_INSTALLED: + raise nose.SkipTest('no sqlalchemy') + expected = DataFrame({'A': list('abcd')}) + df = self.encode_decode(expected, compress='blosc') + eng = self._create_sql_engine("sqlite:///:memory:") + df.to_sql('test', eng, if_exists='append') + result = pandas.read_sql_table('test', eng, index_col='index') + result.index.names = [None] + assert_frame_equal(expected, result) + + def test_readonly_axis_zlib_to_sql(self): + # GH11880 + if not _ZLIB_INSTALLED: + raise nose.SkipTest('no zlib') + if not self._SQLALCHEMY_INSTALLED: + raise nose.SkipTest('no sqlalchemy') + expected = DataFrame({'A': list('abcd')}) + df = self.encode_decode(expected, compress='zlib') + eng = self._create_sql_engine("sqlite:///:memory:") + df.to_sql('test', eng, if_exists='append') + result = pandas.read_sql_table('test', eng, index_col='index') + result.index.names = [None] + assert_frame_equal(expected, result) + class TestEncoding(TestPackers): def setUp(self):