From 91d85cb4f8b2d10422753b6d070a937011947077 Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 21:14:02 +0100 Subject: [PATCH 1/7] msgpack supports CategoricalIndex --- pandas/io/packers.py | 2 +- pandas/tests/msgpack/test_unpack.py | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 3f4be6ad459d8..7afe8a06b6af1 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -54,7 +54,7 @@ from pandas import (Timestamp, Period, Series, DataFrame, # noqa Index, MultiIndex, Float64Index, Int64Index, Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT, - Categorical) + Categorical, CategoricalIndex) from pandas.tslib import NaTType from pandas.sparse.api import SparseSeries, SparseDataFrame from pandas.sparse.array import BlockIndex, IntIndex diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py index ae8227ab276fb..7baeaf0fbfb11 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/msgpack/test_unpack.py @@ -3,7 +3,7 @@ from pandas.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm import pytest - +from pandas import DataFrame class TestUnpack(tm.TestCase): @@ -62,3 +62,14 @@ def _hook(self, code, data): assert unpacker.unpack() == {'a': 123} unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) assert unpacker.unpack() == {'a': ExtType(2, b'321')} + + def test_unpack_categorical_index(self): + '''dataframe with CategoricalIndex can be read and written''' + pdf = pd.DataFrame(dict(A=[1,1,1,2,2,2], B = [1,2,3,4,5,6])) + pdf['A'] = pdf['A'].astype('category') + pdf.set_index('A', inplace = True) + f = BytesIO() + pdf.to_msgpack(f) + f.seek(0) + pdf2 = pd.read_msgpack(f) + tm.assert_frame_equal(pdf, pdf2) \ No newline at end of file From f3f492a81b017531b939468d1c5085f8b9ded416 Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 22:17:38 +0100 Subject: [PATCH 2/7] fix test --- pandas/tests/msgpack/test_unpack.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py index 7baeaf0fbfb11..86427edbdbf4f 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/msgpack/test_unpack.py @@ -3,7 +3,7 @@ from pandas.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm import pytest -from pandas import DataFrame +from pandas import DataFrame, read_msgpack class TestUnpack(tm.TestCase): @@ -65,11 +65,11 @@ def _hook(self, code, data): def test_unpack_categorical_index(self): '''dataframe with CategoricalIndex can be read and written''' - pdf = pd.DataFrame(dict(A=[1,1,1,2,2,2], B = [1,2,3,4,5,6])) + pdf = DataFrame(dict(A=[1,1,1,2,2,2], B = [1,2,3,4,5,6])) pdf['A'] = pdf['A'].astype('category') pdf.set_index('A', inplace = True) f = BytesIO() pdf.to_msgpack(f) f.seek(0) - pdf2 = pd.read_msgpack(f) + pdf2 = read_msgpack(f) tm.assert_frame_equal(pdf, pdf2) \ No newline at end of file From 7895c16fcd765630e3132e3464c48364a94bb6ae Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 22:25:58 +0100 Subject: [PATCH 3/7] flake8 --- pandas/tests/msgpack/test_unpack.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py index 86427edbdbf4f..007af0ce8a6bf 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/msgpack/test_unpack.py @@ -5,6 +5,7 @@ import pytest from pandas import DataFrame, read_msgpack + class TestUnpack(tm.TestCase): def test_unpack_array_header_from_file(self): @@ -62,14 +63,14 @@ def _hook(self, code, data): assert unpacker.unpack() == {'a': 123} unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) assert unpacker.unpack() == {'a': ExtType(2, b'321')} - + def test_unpack_categorical_index(self): '''dataframe with CategoricalIndex can be read and written''' - pdf = DataFrame(dict(A=[1,1,1,2,2,2], B = [1,2,3,4,5,6])) + pdf = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=[1, 2, 3, 4, 5, 6])) pdf['A'] = pdf['A'].astype('category') - pdf.set_index('A', inplace = True) + pdf.set_index('A', inplace=True) f = BytesIO() pdf.to_msgpack(f) f.seek(0) pdf2 = read_msgpack(f) - tm.assert_frame_equal(pdf, pdf2) \ No newline at end of file + tm.assert_frame_equal(pdf, pdf2) From cd9354fc9790515b4fe602689762d89fa568dfff Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 22:56:15 +0100 Subject: [PATCH 4/7] improve tests --- pandas/tests/io/test_packers.py | 5 +++++ pandas/tests/msgpack/test_unpack.py | 12 ------------ 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 097c03937ca68..30e15858c61e7 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -123,6 +123,11 @@ def test_string_io(self): result = read_msgpack(s) tm.assert_frame_equal(result, df) + df2 = df.astype({0:'category'}).set_index(0) + s = to_msgpack(None, df) + result = read_msgpack(s) + tm.assert_frame_equal(result, df) + with ensure_clean(self.path) as p: s = df.to_msgpack() diff --git a/pandas/tests/msgpack/test_unpack.py b/pandas/tests/msgpack/test_unpack.py index 007af0ce8a6bf..ae8227ab276fb 100644 --- a/pandas/tests/msgpack/test_unpack.py +++ b/pandas/tests/msgpack/test_unpack.py @@ -3,7 +3,6 @@ from pandas.msgpack import Unpacker, packb, OutOfData, ExtType import pandas.util.testing as tm import pytest -from pandas import DataFrame, read_msgpack class TestUnpack(tm.TestCase): @@ -63,14 +62,3 @@ def _hook(self, code, data): assert unpacker.unpack() == {'a': 123} unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) assert unpacker.unpack() == {'a': ExtType(2, b'321')} - - def test_unpack_categorical_index(self): - '''dataframe with CategoricalIndex can be read and written''' - pdf = DataFrame(dict(A=[1, 1, 1, 2, 2, 2], B=[1, 2, 3, 4, 5, 6])) - pdf['A'] = pdf['A'].astype('category') - pdf.set_index('A', inplace=True) - f = BytesIO() - pdf.to_msgpack(f) - f.seek(0) - pdf2 = read_msgpack(f) - tm.assert_frame_equal(pdf, pdf2) From 215c2aae4810538b4d7ab223577bc76ed22a271b Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 22:58:01 +0100 Subject: [PATCH 5/7] improve tests --- pandas/tests/io/test_packers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 30e15858c61e7..1f3518a8467e0 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -123,10 +123,10 @@ def test_string_io(self): result = read_msgpack(s) tm.assert_frame_equal(result, df) - df2 = df.astype({0:'category'}).set_index(0) - s = to_msgpack(None, df) + df2 = df.astype({0: 'category'}).set_index(0) + s = to_msgpack(None, df2) result = read_msgpack(s) - tm.assert_frame_equal(result, df) + tm.assert_frame_equal(result, df2) with ensure_clean(self.path) as p: From 3c1f2e7f2b0ce5d15124bd2328345a255d7897f6 Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Thu, 23 Feb 2017 23:05:25 +0100 Subject: [PATCH 6/7] whatsnew --- doc/source/whatsnew/v0.20.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa24c973a7549..800aecad514c1 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -627,3 +627,4 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) +- Bug in ``DataFrame.read_msgpack`` which did not allow to load DataFrames with a CategoricalIndex (:issue:`15487`) From c1c68e4040098a29ff9afdafae0bc4a882a43f30 Mon Sep 17 00:00:00 2001 From: Arco Bast Date: Fri, 24 Feb 2017 00:12:49 +0100 Subject: [PATCH 7/7] corrections --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/tests/io/test_packers.py | 13 ++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 800aecad514c1..efcc6ee5577d7 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -627,4 +627,4 @@ Bug Fixes - Bug in ``Series.replace`` and ``DataFrame.replace`` which failed on empty replacement dicts (:issue:`15289`) - Bug in ``pd.melt()`` where passing a tuple value for ``value_vars`` caused a ``TypeError`` (:issue:`15348`) - Bug in ``.eval()`` which caused multiline evals to fail with local variables not on the first line (:issue:`15342`) -- Bug in ``DataFrame.read_msgpack`` which did not allow to load DataFrames with a CategoricalIndex (:issue:`15487`) +- Bug in ``pd.read_msgpack`` which did not allow to load dataframe with an index of type ``CategoricalIndex`` (:issue:`15487`) diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index 1f3518a8467e0..a832e537b9d3f 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -123,11 +123,6 @@ def test_string_io(self): result = read_msgpack(s) tm.assert_frame_equal(result, df) - df2 = df.astype({0: 'category'}).set_index(0) - s = to_msgpack(None, df2) - result = read_msgpack(s) - tm.assert_frame_equal(result, df2) - with ensure_clean(self.path) as p: s = df.to_msgpack() @@ -155,6 +150,14 @@ def __init__(self): tm.assertRaises(ValueError, read_msgpack, path_or_buf={}) tm.assertRaises(ValueError, read_msgpack, path_or_buf=A()) + def test_msgpack_categorical_index(self): + # GH15487 + df = DataFrame(np.random.randn(10, 2)) + df = df.astype({0: 'category'}).set_index(0) + s = to_msgpack(None, df) + result = read_msgpack(s) + tm.assert_frame_equal(result, df) + class TestNumpy(TestPackers):