From 3092bbce60a58723cb0c61719c87ff1924e9d032 Mon Sep 17 00:00:00 2001 From: ri938 Date: Mon, 3 Jul 2017 21:10:28 +0100 Subject: [PATCH 01/16] BUG: reindex would throw when a categorical index was empty #16770 --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/core/indexes/category.py | 9 +++++++-- pandas/tests/indexes/test_category.py | 8 ++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 3d6aba98d4d57..3d82b2bd452bb 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -42,6 +42,7 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) +- Handle reindexing an empty categorical index rather than throwing (:issue:`16770`) Conversion diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d13636e8b43e2..7d3d96311f05a 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -419,7 +419,11 @@ def reindex(self, target, method=None, level=None, limit=None, raise ValueError("cannot reindex with a non-unique indexer") indexer, missing = self.get_indexer_non_unique(np.array(target)) - new_target = self.take(indexer) + + if len(self.codes): + new_target = self.take(indexer) + else: + new_target = target # filling in missing if needed if len(missing): @@ -430,7 +434,8 @@ def reindex(self, target, method=None, level=None, limit=None, result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) - + # see GH 16819, indexer needs to be converted to correct type + indexer = np.array(indexer, dtype=np.int64) else: codes = new_target.codes.copy() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 493274fff43e0..e1ac811b90bbc 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -419,6 +419,14 @@ def test_reindex_dtype(self): tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) + def test_reindex_empty_index(self): + # See GH16770 + c = CategoricalIndex([]) + res, indexer = c.reindex(['a', 'b']) + tm.assert_index_equal(res, Index(['a', 'b']), exact=True) + tm.assert_numpy_array_equal(indexer, + np.array([-1, -1], dtype=np.int64)) + def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') From 69454ecbc826e0f95ee23fc41a5976b3906fbcff Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 4 Jul 2017 15:11:47 +0100 Subject: [PATCH 02/16] Minor corrections to previous submit (#16820) --- doc/source/whatsnew/v0.20.3.txt | 2 +- pandas/core/indexes/category.py | 4 +--- pandas/tests/indexes/test_category.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 3d82b2bd452bb..e555a24a853aa 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -42,7 +42,7 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) -- Handle reindexing an empty categorical index rather than throwing (:issue:`16770`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) Conversion diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 7d3d96311f05a..b7ae053461cb5 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -420,7 +420,7 @@ def reindex(self, target, method=None, level=None, limit=None, indexer, missing = self.get_indexer_non_unique(np.array(target)) - if len(self.codes): + if len(indexer): new_target = self.take(indexer) else: new_target = target @@ -434,8 +434,6 @@ def reindex(self, target, method=None, level=None, limit=None, result = Index(np.array(self), name=self.name) new_target, indexer, _ = result._reindex_non_unique( np.array(target)) - # see GH 16819, indexer needs to be converted to correct type - indexer = np.array(indexer, dtype=np.int64) else: codes = new_target.codes.copy() diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index e1ac811b90bbc..1382b6bce0248 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -425,7 +425,7 @@ def test_reindex_empty_index(self): res, indexer = c.reindex(['a', 'b']) tm.assert_index_equal(res, Index(['a', 'b']), exact=True) tm.assert_numpy_array_equal(indexer, - np.array([-1, -1], dtype=np.int64)) + np.array([-1, -1], dtype=np.intp)) def test_duplicates(self): From 0a20024a519fbc36320fd8d2d1a2bb8b900b1146 Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 4 Jul 2017 16:19:10 +0100 Subject: [PATCH 03/16] Minor correction to previous submit --- pandas/core/indexes/category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b7ae053461cb5..568b7e1c453f5 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -420,7 +420,7 @@ def reindex(self, target, method=None, level=None, limit=None, indexer, missing = self.get_indexer_non_unique(np.array(target)) - if len(indexer): + if len(self.codes): new_target = self.take(indexer) else: new_target = target From 0645868ed76bf3f7c6e3ac26ca613a94c5135fce Mon Sep 17 00:00:00 2001 From: ri938 Date: Thu, 6 Jul 2017 12:56:29 +0100 Subject: [PATCH 04/16] Add backticks in documentation --- doc/source/whatsnew/v0.20.3.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index e555a24a853aa..0542254f6a8a1 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -44,7 +44,6 @@ Bug Fixes - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Conversion ^^^^^^^^^^ From 8f8e3d6cb5db74d3382e5518d4cd527ea3916a11 Mon Sep 17 00:00:00 2001 From: ri938 Date: Thu, 6 Jul 2017 14:46:40 +0100 Subject: [PATCH 05/16] TST: register slow marker (#16797) * TST: register slow marker * Update setup.cfg --- doc/source/whatsnew/v0.20.3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 0542254f6a8a1..49546c9e9faca 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -43,6 +43,7 @@ Bug Fixes - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) Conversion ^^^^^^^^^^ From a725fbf7a329a7e3f66ce4b7966ea655b739110b Mon Sep 17 00:00:00 2001 From: Forbidden Donut Date: Fri, 30 Jun 2017 13:05:54 -0700 Subject: [PATCH 06/16] BUG: Fix read of py3 PeriodIndex DataFrame HDF made in py2 (#16781) (#16790) In Python3, reading a DataFrame with a PeriodIndex from an HDF file created in Python2 would incorrectly return a DataFrame with an Int64Index. --- doc/source/whatsnew/v0.20.3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 49546c9e9faca..4b592f56c397d 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -44,6 +44,7 @@ Bug Fixes - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) +- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) Conversion ^^^^^^^^^^ From 800b40d09752820e986ee1dbe01948f1960ea1a1 Mon Sep 17 00:00:00 2001 From: ri938 Date: Mon, 3 Jul 2017 13:39:55 +0100 Subject: [PATCH 07/16] BUG: render dataframe as html do not produce duplicate element id's (#16780) (#16801) * BUG: when rendering dataframe as html do not produce duplicate element id's #16780 * CLN: removing spaces in code causes pylint check to fail * DOC: moved whatsnew comment to 0.20.3 release from 0.21.0 --- doc/source/whatsnew/v0.20.3.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 4b592f56c397d..5bb06d9809a22 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -42,9 +42,10 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) -- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) +- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) Conversion ^^^^^^^^^^ From 5362447d38d9885131f990d6876007c43d739f30 Mon Sep 17 00:00:00 2001 From: Tuan Date: Tue, 4 Jul 2017 03:23:45 +1000 Subject: [PATCH 08/16] fix BUG: ValueError when performing rolling covariance on multi indexed DataFrame (#16814) * fix multi index names * fix line length to pep8 * added what's new entry and reference issue number in test * Update test_multi.py * Update v0.20.3.txt --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/tests/indexes/test_multi.py | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 5bb06d9809a22..e300d252a78e8 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -46,6 +46,7 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) +- Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 719cd2f7e01a4..8e383a7c1de2f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -61,6 +61,15 @@ def f(): tm.assert_raises_regex(ValueError, 'The truth value of a', f) + def test_multi_index_names(self): + + # GH 16789 + cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], + names=['1', '2']) + df = pd.DataFrame(np.ones((10, 6)), columns=cols) + rolling_result = df.rolling(3).cov() + assert rolling_result.index.names == [None, '1', '2'] + def test_labels_dtypes(self): # GH 8456 From 59b17cdeaa2f5b791c6337f21fd976c2c3de3739 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Tue, 4 Jul 2017 16:34:12 -0400 Subject: [PATCH 09/16] BUG: rolling.cov with multi-index columns should presever the MI (#16825) xref #16814 --- doc/source/whatsnew/v0.20.3.txt | 1 + pandas/tests/indexes/test_multi.py | 9 --------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index e300d252a78e8..2041455cee74f 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -47,6 +47,7 @@ Bug Fixes - Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) - Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`) +- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) Conversion ^^^^^^^^^^ diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 8e383a7c1de2f..719cd2f7e01a4 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -61,15 +61,6 @@ def f(): tm.assert_raises_regex(ValueError, 'The truth value of a', f) - def test_multi_index_names(self): - - # GH 16789 - cols = pd.MultiIndex.from_product([['A', 'B'], ['C', 'D', 'E']], - names=['1', '2']) - df = pd.DataFrame(np.ones((10, 6)), columns=cols) - rolling_result = df.rolling(3).cov() - assert rolling_result.index.names == [None, '1', '2'] - def test_labels_dtypes(self): # GH 8456 From 26e1a606e30592e22be52af3c5e5224090044cbf Mon Sep 17 00:00:00 2001 From: ri938 Date: Thu, 6 Jul 2017 14:42:06 +0100 Subject: [PATCH 10/16] Move documentation of change to the next major release 0.21.0 --- doc/source/whatsnew/v0.21.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 95eab9e3b684f..c129fee043bd3 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -141,6 +141,7 @@ Bug Fixes ~~~~~~~~~ - Fixes regression in 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`) +- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) Conversion ^^^^^^^^^^ From 9ed80f029a30087ed6a3d29c456d5406c18e1125 Mon Sep 17 00:00:00 2001 From: ri938 Date: Fri, 7 Jul 2017 12:07:04 +0100 Subject: [PATCH 11/16] Bring documentation into line with master branch. --- doc/source/whatsnew/v0.20.3.txt | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 2041455cee74f..759a416ce8d9f 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -42,12 +42,7 @@ Bug Fixes - Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) - Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) - Bug in a DataFrame/Series with a ``TimedeltaIndex`` when slice indexing (:issue:`16637`) -- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) -- Fixed compat with loading a ``DataFrame`` with a ``PeriodIndex``, from a ``format='fixed'`` HDFStore, in Python 3, that was written in Python 2 (:issue:`16781`) -- Fixed a pytest marker failing downstream packages' tests suites (:issue:`16680`) -- Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`) -- Fixed bug where computing the rolling covariance of a MultiIndexed ``DataFrame`` improperly raised a ``ValueError`` (:issue:`16789`) -- Fixed a bug in failing to compute rolling computations of a column-MultiIndexed ``DataFrame`` (:issue:`16789`, :issue:`16825`) + Conversion ^^^^^^^^^^ @@ -103,4 +98,4 @@ Categorical - Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) Other -^^^^^ +^^^^^ \ No newline at end of file From 6e8f1b334e9df108189a614c0c6872a720b5f3c0 Mon Sep 17 00:00:00 2001 From: ri938 Date: Sat, 8 Jul 2017 21:28:12 +0100 Subject: [PATCH 12/16] Minor corrections to previous submit (#16820) --- pandas/core/indexes/category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 568b7e1c453f5..b7ae053461cb5 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -420,7 +420,7 @@ def reindex(self, target, method=None, level=None, limit=None, indexer, missing = self.get_indexer_non_unique(np.array(target)) - if len(self.codes): + if len(indexer): new_target = self.take(indexer) else: new_target = target From 7acc09f226b25fd864c64a95c65cb719257e0b49 Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 4 Jul 2017 16:19:10 +0100 Subject: [PATCH 13/16] Minor correction to previous submit --- pandas/core/indexes/category.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index b7ae053461cb5..568b7e1c453f5 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -420,7 +420,7 @@ def reindex(self, target, method=None, level=None, limit=None, indexer, missing = self.get_indexer_non_unique(np.array(target)) - if len(indexer): + if len(self.codes): new_target = self.take(indexer) else: new_target = target From 83fd74969383a825502098a3119ed066eec8348e Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 18 Jul 2017 18:13:52 +0100 Subject: [PATCH 14/16] Update v0.20.3.txt --- doc/source/whatsnew/v0.20.3.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 0937a6430b07f..327a374b4e40b 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -57,5 +57,3 @@ Reshaping Categorical ^^^^^^^^^^^ - -- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` with categorical data (:issue:`16793`) From 1f2865eb3338df214ea2b3498ac53880f7cb7094 Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 18 Jul 2017 18:14:18 +0100 Subject: [PATCH 15/16] Update v0.20.3.txt --- doc/source/whatsnew/v0.20.3.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 327a374b4e40b..41378a97dce39 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -57,3 +57,4 @@ Reshaping Categorical ^^^^^^^^^^^ +-- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) From 9802288f3cfd3e8bdd0d261da365f7b501f56afd Mon Sep 17 00:00:00 2001 From: ri938 Date: Tue, 18 Jul 2017 18:14:49 +0100 Subject: [PATCH 16/16] Update v0.20.3.txt --- doc/source/whatsnew/v0.20.3.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 41378a97dce39..582f975f81a7a 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -57,4 +57,4 @@ Reshaping Categorical ^^^^^^^^^^^ --- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`) +- Bug in ``DataFrame.sort_values`` not respecting the ``kind`` parameter with categorical data (:issue:`16793`)