Skip to content

BUG: GH11880 where __contains__ fails in unpacked DataFrame with object cols #12013

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ Bug Fixes
- Bug in ``pd.read_clipboard`` and ``pd.to_clipboard`` functions not supporting Unicode; upgrade included ``pyperclip`` to v1.5.15 (:issue:`9263`)
- Bug in ``DataFrame.query`` containing an assignment (:issue:`8664`)

- Bug in ``from_msgpack`` where ``__contains__()`` fails for columns of the unpacked ``DataFrame``, if the ``DataFrame`` has object columns. (:issue: `11880`)


- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -965,6 +965,7 @@ def corr(self, other=None, pairwise=None, **kwargs):
Use a standard estimation bias correction
"""


class EWM(_Rolling):
r"""
Provides exponential weighted functions
Expand Down
4 changes: 2 additions & 2 deletions pandas/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ cdef class Int64HashTable(HashTable):
self.table.vals[k] = <Py_ssize_t> values[i]

@cython.boundscheck(False)
def map_locations(self, int64_t[:] values):
def map_locations(self, ndarray[int64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down Expand Up @@ -570,7 +570,7 @@ cdef class Float64HashTable(HashTable):
return np.asarray(labels)

@cython.boundscheck(False)
def map_locations(self, float64_t[:] values):
def map_locations(self, ndarray[float64_t, ndim=1] values):
cdef:
Py_ssize_t i, n = len(values)
int ret = 0
Expand Down
74 changes: 68 additions & 6 deletions pandas/io/tests/test_packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
from pandas import compat
from pandas.compat import u
from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
date_range, period_range, Index, SparseSeries, SparseDataFrame,
SparsePanel)
date_range, period_range, Index)
from pandas.io.packers import to_msgpack, read_msgpack
import pandas.util.testing as tm
from pandas.util.testing import (ensure_clean, assert_index_equal,
assert_series_equal,
Expand All @@ -23,7 +23,19 @@

nan = np.nan

from pandas.io.packers import to_msgpack, read_msgpack
try:
import blosc # NOQA
except ImportError:
_BLOSC_INSTALLED = False
else:
_BLOSC_INSTALLED = True

try:
import zlib # NOQA
except ImportError:
_ZLIB_INSTALLED = False
else:
_ZLIB_INSTALLED = True

_multiprocess_can_split_ = False

Expand Down Expand Up @@ -483,6 +495,14 @@ class TestCompression(TestPackers):
"""

def setUp(self):
try:
from sqlalchemy import create_engine
self._create_sql_engine = create_engine
except ImportError:
self._SQLALCHEMY_INSTALLED = False
else:
self._SQLALCHEMY_INSTALLED = True

super(TestCompression, self).setUp()
data = {
'A': np.arange(1000, dtype=np.float64),
Expand All @@ -508,14 +528,56 @@ def test_compression_zlib(self):
assert_frame_equal(self.frame[k], i_rec[k])

def test_compression_blosc(self):
try:
import blosc
except ImportError:
if not _BLOSC_INSTALLED:
raise nose.SkipTest('no blosc')
i_rec = self.encode_decode(self.frame, compress='blosc')
for k in self.frame.keys():
assert_frame_equal(self.frame[k], i_rec[k])

def test_readonly_axis_blosc(self):
# GH11880
if not _BLOSC_INSTALLED:
raise nose.SkipTest('no blosc')
df1 = DataFrame({'A': list('abcd')})
df2 = DataFrame(df1, index=[1., 2., 3., 4.])
self.assertTrue(1 in self.encode_decode(df1['A'], compress='blosc'))
self.assertTrue(1. in self.encode_decode(df2['A'], compress='blosc'))

def test_readonly_axis_zlib(self):
# GH11880
df1 = DataFrame({'A': list('abcd')})
df2 = DataFrame(df1, index=[1., 2., 3., 4.])
self.assertTrue(1 in self.encode_decode(df1['A'], compress='zlib'))
self.assertTrue(1. in self.encode_decode(df2['A'], compress='zlib'))

def test_readonly_axis_blosc_to_sql(self):
# GH11880
if not _BLOSC_INSTALLED:
raise nose.SkipTest('no blosc')
if not self._SQLALCHEMY_INSTALLED:
raise nose.SkipTest('no sqlalchemy')
expected = DataFrame({'A': list('abcd')})
df = self.encode_decode(expected, compress='blosc')
eng = self._create_sql_engine("sqlite:///:memory:")
df.to_sql('test', eng, if_exists='append')
result = pandas.read_sql_table('test', eng, index_col='index')
result.index.names = [None]
assert_frame_equal(expected, result)

def test_readonly_axis_zlib_to_sql(self):
# GH11880
if not _ZLIB_INSTALLED:
raise nose.SkipTest('no zlib')
if not self._SQLALCHEMY_INSTALLED:
raise nose.SkipTest('no sqlalchemy')
expected = DataFrame({'A': list('abcd')})
df = self.encode_decode(expected, compress='zlib')
eng = self._create_sql_engine("sqlite:///:memory:")
df.to_sql('test', eng, if_exists='append')
result = pandas.read_sql_table('test', eng, index_col='index')
result.index.names = [None]
assert_frame_equal(expected, result)


class TestEncoding(TestPackers):
def setUp(self):
Expand Down