Merge pull request #12013 from kawochen/BUG-FIX-11880

jreback · jreback · commit 2213e1828749 · 2016-01-15T08:46:42.000-05:00
BUG: GH11880 where __contains__ fails in unpacked DataFrame with object cols
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
@@ -463,6 +463,7 @@ Bug Fixes
 - Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`)
 - Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`)
 
+- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`)
 
 
 - Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
diff --git a/pandas/core/window.py b/pandas/core/window.py
@@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
    Use a standard estimation bias correction
 """
 
+
 class EWM(_Rolling):
     r"""
     Provides exponential weighted functions
diff --git a/pandas/hashtable.pyx b/pandas/hashtable.pyx
@@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable):
                 self.table.vals[k] = <Py_ssize_t> values[i]
 
     @cython.boundscheck(False)
-    def map_locations(self, int64_t[:] values):
+    def map_locations(self, ndarray[int64_t, ndim=1] values):
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
@@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable):
         return np.asarray(labels)
 
     @cython.boundscheck(False)
-    def map_locations(self, float64_t[:] values):
+    def map_locations(self, ndarray[float64_t, ndim=1] values):
         cdef:
             Py_ssize_t i, n = len(values)
             int ret = 0
diff --git a/pandas/io/tests/test_packers.py b/pandas/io/tests/test_packers.py
@@ -9,8 +9,8 @@
 from pandas import compat
 from pandas.compat import u
 from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
-                    date_range, period_range, Index, SparseSeries, SparseDataFrame,
-                    SparsePanel)
+                    date_range, period_range, Index)
+from pandas.io.packers import to_msgpack, read_msgpack
 import pandas.util.testing as tm
 from pandas.util.testing import (ensure_clean, assert_index_equal,
                                  assert_series_equal,
@@ -23,7 +23,19 @@
 
 nan = np.nan
 
-from pandas.io.packers import to_msgpack, read_msgpack
+try:
+    import blosc  # NOQA
+except ImportError:
+    _BLOSC_INSTALLED = False
+else:
+    _BLOSC_INSTALLED = True
+
+try:
+    import zlib  # NOQA
+except ImportError:
+    _ZLIB_INSTALLED = False
+else:
+    _ZLIB_INSTALLED = True
 
 _multiprocess_can_split_ = False
 
@@ -483,6 +495,14 @@ class TestCompression(TestPackers):
     """
 
     def setUp(self):
+        try:
+            from sqlalchemy import create_engine
+            self._create_sql_engine = create_engine
+        except ImportError:
+            self._SQLALCHEMY_INSTALLED = False
+        else:
+            self._SQLALCHEMY_INSTALLED = True
+
         super(TestCompression, self).setUp()
         data = {
             'A': np.arange(1000, dtype=np.float64),
@@ -508,14 +528,56 @@ def test_compression_zlib(self):
             assert_frame_equal(self.frame[k], i_rec[k])
 
     def test_compression_blosc(self):
-        try:
-            import blosc
-        except ImportError:
+        if not _BLOSC_INSTALLED:
             raise nose.SkipTest('no blosc')
         i_rec = self.encode_decode(self.frame, compress='blosc')
         for k in self.frame.keys():
             assert_frame_equal(self.frame[k], i_rec[k])
 
+    def test_readonly_axis_blosc(self):
+        # GH11880
+        if not _BLOSC_INSTALLED:
+            raise nose.SkipTest('no blosc')
+        df1 = DataFrame({'A': list('abcd')})
+        df2 = DataFrame(df1, index=[1., 2., 3., 4.])
+        self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc'))
+        self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc'))
+
+    def test_readonly_axis_zlib(self):
+        # GH11880
+        df1 = DataFrame({'A': list('abcd')})
+        df2 = DataFrame(df1, index=[1., 2., 3., 4.])
+        self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib'))
+        self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib'))
+
+    def test_readonly_axis_blosc_to_sql(self):
+        # GH11880
+        if not _BLOSC_INSTALLED:
+            raise nose.SkipTest('no blosc')
+        if not self._SQLALCHEMY_INSTALLED:
+            raise nose.SkipTest('no sqlalchemy')
+        expected = DataFrame({'A': list('abcd')})
+        df = self.encode_decode(expected, compress='blosc')
+        eng = self._create_sql_engine("sqlite:///:memory:")
+        df.to_sql('test', eng, if_exists='append')
+        result = pandas.read_sql_table('test', eng, index_col='index')
+        result.index.names = [None]
+        assert_frame_equal(expected, result)
+
+    def test_readonly_axis_zlib_to_sql(self):
+        # GH11880
+        if not _ZLIB_INSTALLED:
+            raise nose.SkipTest('no zlib')
+        if not self._SQLALCHEMY_INSTALLED:
+            raise nose.SkipTest('no sqlalchemy')
+        expected = DataFrame({'A': list('abcd')})
+        df = self.encode_decode(expected, compress='zlib')
+        eng = self._create_sql_engine("sqlite:///:memory:")
+        df.to_sql('test', eng, if_exists='append')
+        result = pandas.read_sql_table('test', eng, index_col='index')
+        result.index.names = [None]
+        assert_frame_equal(expected, result)
+
 
 class TestEncoding(TestPackers):
         def setUp(self):