From a9a5177acc2ac6a86c6d0b4cd28ae6a665232711 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Mon, 23 Oct 2017 22:14:12 -0400 Subject: [PATCH 01/46] BUG: GH17464 Add error checking for duplicate levels in MultiIndex --- pandas/core/indexes/multi.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 456999b94c523..bea63161dc2a6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -177,8 +177,8 @@ def _verify_integrity(self, labels=None, levels=None): Raises ------ ValueError - * if length of levels and labels don't match or any label would - exceed level bounds + If length of levels and labels don't match, if any label would + exceed level bounds, or there are any duplicate levels. """ # NOTE: Currently does not check, among other things, that cached # nlevels matches nor that sortorder matches actually sortorder. @@ -198,6 +198,10 @@ def _verify_integrity(self, labels=None, levels=None): " level (%d). NOTE: this index is in an" " inconsistent state" % (i, label.max(), len(level))) + if not level.is_unique: + raise ValueError("Level values must be unique: {0}" + " on level {1}".format( + [value for value in level], i)) @property def levels(self): From 91c8388a2b3e6307c913ee420bafb684978d4d7b Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:38:06 -0400 Subject: [PATCH 02/46] Remove duplicate levels from `test_is_` --- pandas/tests/indexes/test_multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 5c2a0254b072b..28b3d16a87dbd 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,7 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() - mi4.set_levels([[1 for _ in range(10)], lrange(10)], inplace=True) + mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() mi5.set_levels(mi5.levels, inplace=True) From 392ce8a65ad636b19d6e03573c302e0e04628a98 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:52:44 -0400 Subject: [PATCH 03/46] Remove duplicate levels from `test_level_setting_resets_attributes` --- pandas/tests/indexes/test_multi.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 28b3d16a87dbd..c3160c09ec791 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2450,13 +2450,11 @@ def test_isna_behavior(self): pd.isna(self.index) def test_level_setting_resets_attributes(self): - ind = MultiIndex.from_arrays([ + ind = pd.MultiIndex.from_arrays([ ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic - ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], - inplace=True) - + ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From 3c0812ef08a66a8381bfb1d72e0138d838d574e6 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:01:32 -0400 Subject: [PATCH 04/46] Remove duplicate levels from `test_frame_describe_multikey` --- pandas/tests/groupby/test_groupby.py | 74 ++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 3436dd9169081..37a605c98b7ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -385,6 +385,80 @@ def test_attr_wrapper(self): # make sure raises error pytest.raises(AttributeError, getattr, grouped, 'foo') + def test_series_describe_multikey(self): + ts = tm.makeTimeSeries() + grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + assert_series_equal(result['mean'], grouped.mean(), check_names=False) + assert_series_equal(result['std'], grouped.std(), check_names=False) + assert_series_equal(result['min'], grouped.min(), check_names=False) + + def test_series_describe_single(self): + ts = tm.makeTimeSeries() + grouped = ts.groupby(lambda x: x.month) + result = grouped.apply(lambda x: x.describe()) + expected = grouped.describe().stack() + assert_series_equal(result, expected) + + def test_series_index_name(self): + grouped = self.df.loc[:, ['C']].groupby(self.df['A']) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == 'A' + + def test_frame_describe_multikey(self): + grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) + result = grouped.describe() + desc_groups = [] + for col in self.tsframe: + group = grouped[col].describe() + group_col = pd.MultiIndex( + levels=[[col], group.columns], + labels=[[0]*len(group.columns), range(len(group.columns))]) + group = pd.DataFrame(group.values, + columns=group_col, + index=group.index) + desc_groups.append(group) + expected = pd.concat(desc_groups, axis=1) + tm.assert_frame_equal(result, expected) + + groupedT = self.tsframe.groupby({'A': 0, 'B': 0, + 'C': 1, 'D': 1}, axis=1) + result = groupedT.describe() + expected = self.tsframe.describe().T + expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) + tm.assert_frame_equal(result, expected) + + def test_frame_describe_tupleindex(self): + + # GH 14848 - regression from 0.19.0 to 0.19.1 + df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, + 'y': [10, 20, 30, 40, 50] * 3, + 'z': [100, 200, 300, 400, 500] * 3}) + df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 + df2 = df1.rename(columns={'k': 'key'}) + pytest.raises(ValueError, lambda: df1.groupby('k').describe()) + pytest.raises(ValueError, lambda: df2.groupby('key').describe()) + + def test_frame_describe_unstacked_format(self): + # GH 4792 + prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} + volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, + pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, + pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} + df = pd.DataFrame({'PRICE': prices, + 'VOLUME': volumes}) + result = df.groupby('PRICE').VOLUME.describe() + data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), + df[df.PRICE == 25499].VOLUME.describe().values.tolist()] + expected = pd.DataFrame(data, + index=pd.Index([24990, 25499], name='PRICE'), + columns=['count', 'mean', 'std', 'min', + '25%', '50%', '75%', 'max']) + tm.assert_frame_equal(result, expected) + def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday()) From 09834537342971719a5563056141ba2b0f645f5d Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:02:41 -0400 Subject: [PATCH 05/46] Add comments about change --- pandas/tests/groupby/test_groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37a605c98b7ce..51180a1c01def 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,6 +411,7 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], labels=[[0]*len(group.columns), range(len(group.columns))]) @@ -425,6 +426,7 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T + # GH 17464 - Remove duplicate MultiIndex levels expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) From b39421fa9a11dcf19e8aff4441182f771aae8e9b Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:04:12 -0400 Subject: [PATCH 06/46] Added comments changes w/ bug number --- pandas/tests/indexes/test_multi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c3160c09ec791..a96f81cd0fbe0 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,6 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() + # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() @@ -2454,6 +2455,7 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic + # GH 17464 - Remove duplicate MultiIndex levels ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From 57714263c70c02feacdd93609e849182fa5597d6 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:05:49 -0400 Subject: [PATCH 07/46] PEP8 compliance --- pandas/tests/groupby/test_groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 51180a1c01def..6a9facdc7e903 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -414,7 +414,7 @@ def test_frame_describe_multikey(self): # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0]*len(group.columns), range(len(group.columns))]) + labels=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -427,8 +427,9 @@ def test_frame_describe_multikey(self): result = groupedT.describe() expected = self.tsframe.describe().T # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 48f94292152007cd771c5466a32fd9b9de49a417 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:09:52 -0400 Subject: [PATCH 08/46] whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 89e2d3006696c..4911ecbb161a5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1172,4 +1172,3 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - From 9ffc8ad114fe21e63a231333e2f3a4287c421420 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:38:06 -0400 Subject: [PATCH 09/46] Remove duplicate levels from `test_is_` --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index a96f81cd0fbe0..9af085b1b67b6 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,6 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() + # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) From e70a2be01d96832b90026df9589ff3fd840d68cd Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:52:44 -0400 Subject: [PATCH 10/46] Remove duplicate levels from `test_level_setting_resets_attributes` --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 9af085b1b67b6..ee68a91ef6c52 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2456,6 +2456,7 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic + # GH 17464 - Remove duplicate MultiIndex levels ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. From c28d3df1780ace58f8df663761648c4a54a992b7 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:01:32 -0400 Subject: [PATCH 11/46] Remove duplicate levels from `test_frame_describe_multikey` --- pandas/tests/groupby/test_groupby.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6a9facdc7e903..37a605c98b7ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,10 +411,9 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() - # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0] * len(group.columns), range(len(group.columns))]) + labels=[[0]*len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -426,10 +425,8 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T - # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex( - levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 03f2da8ab0fb508cc723a16836af1eee7124cc26 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:02:41 -0400 Subject: [PATCH 12/46] Add comments about change --- pandas/tests/groupby/test_groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37a605c98b7ce..51180a1c01def 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,6 +411,7 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], labels=[[0]*len(group.columns), range(len(group.columns))]) @@ -425,6 +426,7 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T + # GH 17464 - Remove duplicate MultiIndex levels expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) From 1d45ab6bd9d948c5a800d737385f91fe3cab3603 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:04:12 -0400 Subject: [PATCH 13/46] Added comments changes w/ bug number --- pandas/tests/indexes/test_multi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index ee68a91ef6c52..0d89691f2f945 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,7 +1618,10 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() +<<<<<<< HEAD +======= +>>>>>>> 529a040... Added comments changes w/ bug number # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) From 015af48ea64d306e10c7ada1ee19247d4fbdbe89 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:05:49 -0400 Subject: [PATCH 14/46] PEP8 compliance --- pandas/tests/groupby/test_groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 51180a1c01def..6a9facdc7e903 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -414,7 +414,7 @@ def test_frame_describe_multikey(self): # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0]*len(group.columns), range(len(group.columns))]) + labels=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -427,8 +427,9 @@ def test_frame_describe_multikey(self): result = groupedT.describe() expected = self.tsframe.describe().T # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 9dc7eb55b37f7ddb3bfa89084e286a5bef41adfe Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:09:52 -0400 Subject: [PATCH 15/46] whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4911ecbb161a5..5c0e970b553f6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1067,8 +1067,14 @@ Indexing - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) +<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) +======= +- Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) +- Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) +- When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) +>>>>>>> 4eff2d5... whatsnew entry I/O ^^^ From 9aa2bcd5133b8f2f40be0f71da30de7176a1298f Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Wed, 25 Oct 2017 17:44:49 -0400 Subject: [PATCH 16/46] Whatsnew backticks --- doc/source/whatsnew/v0.21.0.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5c0e970b553f6..44023b0f3a07a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1073,8 +1073,12 @@ Indexing ======= - Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) - Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) +<<<<<<< HEAD - When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) >>>>>>> 4eff2d5... whatsnew entry +======= +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +>>>>>>> 868ec45... Whatsnew backticks I/O ^^^ From e52460eb80ffebe190a42709bb867ec1e4a7409a Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Wed, 25 Oct 2017 17:46:35 -0400 Subject: [PATCH 17/46] whatsnew merging --- doc/source/whatsnew/v0.21.0.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 44023b0f3a07a..bb86e0d5d4703 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1068,6 +1068,7 @@ Indexing - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) <<<<<<< HEAD +<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) ======= @@ -1077,6 +1078,10 @@ Indexing - When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) >>>>>>> 4eff2d5... whatsnew entry ======= +======= +- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) +- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) +>>>>>>> f74a4ab... whatsnew merging - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) >>>>>>> 868ec45... Whatsnew backticks From 48d509d5e5bc55cc5dfb4a4cacc2dbaf7fcc156b Mon Sep 17 00:00:00 2001 From: cmazzullo Date: Fri, 27 Oct 2017 16:29:22 -0400 Subject: [PATCH 18/46] Removed comments about this issue from other tests --- pandas/tests/indexes/test_multi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 0d89691f2f945..a6c21c444ed9f 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,10 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() -<<<<<<< HEAD -======= ->>>>>>> 529a040... Added comments changes w/ bug number # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) @@ -2459,8 +2456,11 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic +<<<<<<< HEAD # GH 17464 - Remove duplicate MultiIndex levels +======= +>>>>>>> 6ce2637... Removed comments about this issue from other tests ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From d75e1de9e41ab43df50fbb7839de242ccacb2c33 Mon Sep 17 00:00:00 2001 From: cmazzullo Date: Fri, 27 Oct 2017 16:40:59 -0400 Subject: [PATCH 19/46] Added test to make sure a ValueError is thrown --- pandas/tests/indexes/test_multi.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index a6c21c444ed9f..829e47d2d93b5 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2456,11 +2456,6 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic -<<<<<<< HEAD - - # GH 17464 - Remove duplicate MultiIndex levels -======= ->>>>>>> 6ce2637... Removed comments about this issue from other tests ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic @@ -3088,3 +3083,14 @@ def test_million_record_attribute_error(self): with tm.assert_raises_regex(AttributeError, "'Series' object has no attribute 'foo'"): df['a'].foo() + + def test_duplicate_multiindex_labels(self): + # GH 17464 + # Make sure that a MultiIndex with duplicate levels throws a ValueError + with pytest.raises(ValueError): + ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + # And that using set_levels with duplicate levels fails + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]]) + with pytest.raises(ValueError): + ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], + inplace=True) From 0943d19f0e74269bc32b33b0af10cea69e31d97d Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Fri, 27 Oct 2017 19:59:49 -0400 Subject: [PATCH 20/46] PEP8 compliance --- pandas/tests/indexes/test_multi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 829e47d2d93b5..82223cd7b631c 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -3090,7 +3090,8 @@ def test_duplicate_multiindex_labels(self): with pytest.raises(ValueError): ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) # And that using set_levels with duplicate levels fails - ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]]) + ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], + [1, 2, 1, 2, 3]]) with pytest.raises(ValueError): ind.set_levels([['A', 'B', 'A', 'A', 'B'], [2, 1, 3, -2, 5]], inplace=True) From 6f2efc67f9fd2118acfb13ba5dde4841e81bad35 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 28 Oct 2017 16:59:53 -0400 Subject: [PATCH 21/46] move whatsnew to 0.22.0 --- doc/source/whatsnew/v0.21.0.txt | 16 +--------------- doc/source/whatsnew/v0.22.0.txt | 3 +++ 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index bb86e0d5d4703..89e2d3006696c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1067,23 +1067,8 @@ Indexing - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) -<<<<<<< HEAD -<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) -======= -- Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) -- Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) -<<<<<<< HEAD -- When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) ->>>>>>> 4eff2d5... whatsnew entry -======= -======= -- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) -- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) ->>>>>>> f74a4ab... whatsnew merging -- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) ->>>>>>> 868ec45... Whatsnew backticks I/O ^^^ @@ -1187,3 +1172,4 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index d43d5bec7175f..a9851b7cdf2c3 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -81,6 +81,9 @@ Other API Changes - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) +- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` arguments (:issue:`17690`) +- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`) +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) .. _whatsnew_0220.deprecations: From 840fe56f26490876f8fba0e070a80876c56cb5ac Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:38:06 -0400 Subject: [PATCH 22/46] Remove duplicate levels from `test_is_` --- pandas/tests/indexes/test_multi.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 82223cd7b631c..9d2b8b953a875 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,8 +1618,6 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() - - # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() From 137bc16f282cce6b7506f7f9e47d66499c59c280 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:01:32 -0400 Subject: [PATCH 23/46] Remove duplicate levels from `test_frame_describe_multikey` --- pandas/tests/groupby/test_groupby.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6a9facdc7e903..37a605c98b7ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,10 +411,9 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() - # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0] * len(group.columns), range(len(group.columns))]) + labels=[[0]*len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -426,10 +425,8 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T - # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex( - levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 0129ee794101d824ae532a37dc15ce9ca744ccef Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:02:41 -0400 Subject: [PATCH 24/46] Add comments about change --- pandas/tests/groupby/test_groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37a605c98b7ce..51180a1c01def 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,6 +411,7 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], labels=[[0]*len(group.columns), range(len(group.columns))]) @@ -425,6 +426,7 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T + # GH 17464 - Remove duplicate MultiIndex levels expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) From 8b400dcacd829e3ff7fb3771806cae0ef58a41e5 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:04:12 -0400 Subject: [PATCH 25/46] Added comments changes w/ bug number --- pandas/tests/indexes/test_multi.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 9d2b8b953a875..fd99af5681359 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,6 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() + # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) mi5 = mi.view() @@ -2454,6 +2455,7 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic + # GH 17464 - Remove duplicate MultiIndex levels ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From 0a8e9f2d5903fb1b7d870ab81272a26d2c4d1cd4 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:05:49 -0400 Subject: [PATCH 26/46] PEP8 compliance --- pandas/tests/groupby/test_groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 51180a1c01def..6a9facdc7e903 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -414,7 +414,7 @@ def test_frame_describe_multikey(self): # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0]*len(group.columns), range(len(group.columns))]) + labels=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -427,8 +427,9 @@ def test_frame_describe_multikey(self): result = groupedT.describe() expected = self.tsframe.describe().T # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From cc7ebc74a81ce8090e664c0e2527f58369ef6a7a Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:09:52 -0400 Subject: [PATCH 27/46] whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 89e2d3006696c..4911ecbb161a5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1172,4 +1172,3 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - From b02114ff2cff8dc822744b63c7271853881fa027 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:38:06 -0400 Subject: [PATCH 28/46] Remove duplicate levels from `test_is_` --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index fd99af5681359..915c03c598178 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,6 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() + # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) From b56eca0dbbed4260e9b06259ac0c8b78f9d71e1c Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 20:52:44 -0400 Subject: [PATCH 29/46] Remove duplicate levels from `test_level_setting_resets_attributes` --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 915c03c598178..b75cd59aaf9dc 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2456,6 +2456,7 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic + # GH 17464 - Remove duplicate MultiIndex levels ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. From 9f179e6700f5352db433afe8c6c15941b489afe2 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:01:32 -0400 Subject: [PATCH 30/46] Remove duplicate levels from `test_frame_describe_multikey` --- pandas/tests/groupby/test_groupby.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6a9facdc7e903..37a605c98b7ce 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,10 +411,9 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() - # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0] * len(group.columns), range(len(group.columns))]) + labels=[[0]*len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -426,10 +425,8 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T - # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex( - levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 2af9aba277e48942bd3edd45cb503a110ba37ab3 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:02:41 -0400 Subject: [PATCH 31/46] Add comments about change --- pandas/tests/groupby/test_groupby.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 37a605c98b7ce..51180a1c01def 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -411,6 +411,7 @@ def test_frame_describe_multikey(self): desc_groups = [] for col in self.tsframe: group = grouped[col].describe() + # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], labels=[[0]*len(group.columns), range(len(group.columns))]) @@ -425,6 +426,7 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T + # GH 17464 - Remove duplicate MultiIndex levels expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) From 3e56abaae51732e796b3b0866e5a89f9cc517331 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:04:12 -0400 Subject: [PATCH 32/46] Added comments changes w/ bug number --- pandas/tests/indexes/test_multi.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index b75cd59aaf9dc..56c9b52564bcb 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,7 +1618,10 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() +<<<<<<< HEAD +======= +>>>>>>> 529a040... Added comments changes w/ bug number # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) From 85d63791a078cf074dcd24ad9a571ba1557a9044 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:05:49 -0400 Subject: [PATCH 33/46] PEP8 compliance --- pandas/tests/groupby/test_groupby.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 51180a1c01def..6a9facdc7e903 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -414,7 +414,7 @@ def test_frame_describe_multikey(self): # GH 17464 - Remove duplicate MultiIndex levels group_col = pd.MultiIndex( levels=[[col], group.columns], - labels=[[0]*len(group.columns), range(len(group.columns))]) + labels=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -427,8 +427,9 @@ def test_frame_describe_multikey(self): result = groupedT.describe() expected = self.tsframe.describe().T # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex(levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 49b731d76bb85e78ea0ab7ceb858c354b1d036d6 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 24 Oct 2017 22:09:52 -0400 Subject: [PATCH 34/46] whatsnew entry --- doc/source/whatsnew/v0.21.0.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 4911ecbb161a5..5c0e970b553f6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1067,8 +1067,14 @@ Indexing - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) +<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) +======= +- Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) +- Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) +- When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) +>>>>>>> 4eff2d5... whatsnew entry I/O ^^^ From ec4f9718387fbc1fd3eab92d6b7c85563da9d61a Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Wed, 25 Oct 2017 17:44:49 -0400 Subject: [PATCH 35/46] Whatsnew backticks --- doc/source/whatsnew/v0.21.0.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 5c0e970b553f6..44023b0f3a07a 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1073,8 +1073,12 @@ Indexing ======= - Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) - Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) +<<<<<<< HEAD - When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) >>>>>>> 4eff2d5... whatsnew entry +======= +- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) +>>>>>>> 868ec45... Whatsnew backticks I/O ^^^ From 2684855afb731ba621b462cd59af1c6842576799 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Wed, 25 Oct 2017 17:46:35 -0400 Subject: [PATCH 36/46] whatsnew merging --- doc/source/whatsnew/v0.21.0.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 44023b0f3a07a..bb86e0d5d4703 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1068,6 +1068,7 @@ Indexing - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) <<<<<<< HEAD +<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) ======= @@ -1077,6 +1078,10 @@ Indexing - When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) >>>>>>> 4eff2d5... whatsnew entry ======= +======= +- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) +- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) +>>>>>>> f74a4ab... whatsnew merging - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) >>>>>>> 868ec45... Whatsnew backticks From c36c236eff2cba1c48c921976994ec90ef004247 Mon Sep 17 00:00:00 2001 From: cmazzullo Date: Fri, 27 Oct 2017 16:29:22 -0400 Subject: [PATCH 37/46] Removed comments about this issue from other tests --- pandas/tests/indexes/test_multi.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 56c9b52564bcb..d80dc340ac347 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1618,10 +1618,7 @@ def test_is_(self): # shouldn't change assert mi2.is_(mi) mi4 = mi3.view() -<<<<<<< HEAD -======= ->>>>>>> 529a040... Added comments changes w/ bug number # GH 17464 - Remove duplicate MultiIndex levels mi4.set_levels([lrange(10), lrange(10)], inplace=True) assert not mi4.is_(mi3) @@ -2459,8 +2456,11 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic +<<<<<<< HEAD # GH 17464 - Remove duplicate MultiIndex levels +======= +>>>>>>> 6ce2637... Removed comments about this issue from other tests ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From 2b3f4d423c4bedcee4e856b2c03ee8be98a0d07c Mon Sep 17 00:00:00 2001 From: cmazzullo Date: Fri, 27 Oct 2017 16:40:59 -0400 Subject: [PATCH 38/46] Added test to make sure a ValueError is thrown --- pandas/tests/indexes/test_multi.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index d80dc340ac347..82223cd7b631c 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2456,11 +2456,6 @@ def test_level_setting_resets_attributes(self): ['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3] ]) assert ind.is_monotonic -<<<<<<< HEAD - - # GH 17464 - Remove duplicate MultiIndex levels -======= ->>>>>>> 6ce2637... Removed comments about this issue from other tests ind.set_levels([['A', 'B'], [1, 3, 2]], inplace=True) # if this fails, probably didn't reset the cache correctly. assert not ind.is_monotonic From 386daaf56673303fbbf3f58cf9d65106a5822859 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 28 Oct 2017 16:59:53 -0400 Subject: [PATCH 39/46] move whatsnew to 0.22.0 --- doc/source/whatsnew/v0.21.0.txt | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index bb86e0d5d4703..89e2d3006696c 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -1067,23 +1067,8 @@ Indexing - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`) -<<<<<<< HEAD -<<<<<<< HEAD - Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) - Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) -======= -- Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`) -- Bug in :func:`String.str_get` raises `index out of range` error instead of inserting NaNs when using a negative index. (:issue:`17704`) -<<<<<<< HEAD -- When created with duplicate labels, ``MultiIndex`` now raises a `ValueError`. (:issue:`17464`) ->>>>>>> 4eff2d5... whatsnew entry -======= -======= -- Bug in :func:`Series.rename` when called with a callable, incorrectly alters the name of the ``Series``, rather than the name of the ``Index``. (:issue:`17407`) -- Bug in :func:`String.str_get` raises ``IndexError`` instead of inserting NaNs when using a negative index. (:issue:`17704`) ->>>>>>> f74a4ab... whatsnew merging -- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) ->>>>>>> 868ec45... Whatsnew backticks I/O ^^^ @@ -1187,3 +1172,4 @@ Other ^^^^^ - Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`) - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) + From c1696459c9fe3ad3dca4aa3e896cc425069dfd18 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Tue, 28 Nov 2017 22:54:52 -0500 Subject: [PATCH 40/46] Updated `test_frame_describe_multikey` to remove duplicate MultiIndex levels --- pandas/tests/groupby/test_functional.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py index bc13d51c4f4f6..dedc072a8d4ae 100644 --- a/pandas/tests/groupby/test_functional.py +++ b/pandas/tests/groupby/test_functional.py @@ -46,16 +46,17 @@ def test_series_index_name(self): result = grouped.agg(lambda x: x.mean()) assert result.index.name == 'A' + def test_frame_describe_multikey(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() desc_groups = [] for col in self.tsframe: group = grouped[col].describe() - group_col = pd.MultiIndex([[col] * len(group.columns), - group.columns], - [[0] * len(group.columns), - range(len(group.columns))]) + # GH 17464 - Remove duplicate MultiIndex levels + group_col = pd.MultiIndex( + levels=[[col], group.columns], + labels=[[0] * len(group.columns), range(len(group.columns))]) group = pd.DataFrame(group.values, columns=group_col, index=group.index) @@ -67,8 +68,10 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T - expected.index = pd.MultiIndex([[0, 0, 1, 1], expected.index], - [range(4), range(len(expected.index))]) + # GH 17464 - Remove duplicate MultiIndex levels + expected.index = pd.MultiIndex( + levels=[[0, 1], expected.index], + labels=[[0, 0, 1, 1], range(len(expected.index))]) tm.assert_frame_equal(result, expected) def test_frame_describe_tupleindex(self): From 073e62936f3a023b666acb22f3a49ab76c7d876e Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Fri, 1 Dec 2017 07:24:41 -0500 Subject: [PATCH 41/46] Fixed linting issue --- pandas/tests/groupby/test_functional.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py index dedc072a8d4ae..2360b9167ff9a 100644 --- a/pandas/tests/groupby/test_functional.py +++ b/pandas/tests/groupby/test_functional.py @@ -46,7 +46,6 @@ def test_series_index_name(self): result = grouped.agg(lambda x: x.mean()) assert result.index.name == 'A' - def test_frame_describe_multikey(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) result = grouped.describe() From 869157dd4c158d13e0b3e91eff5bcce611c9ad10 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Sat, 2 Dec 2017 17:04:44 -0500 Subject: [PATCH 42/46] whatsnew changes --- doc/source/whatsnew/v0.22.0.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index a9851b7cdf2c3..150e86f564ad0 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -81,8 +81,6 @@ Other API Changes - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`) - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) -- :class:`Timestamp` will no longer silently ignore unused or invalid `tz` or `tzinfo` arguments (:issue:`17690`) -- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the `tseries.offsets` module (:issue:`17830`) - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) .. _whatsnew_0220.deprecations: From 44e45524159809f5ac42f48414af01ce7b4bdf01 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Sat, 2 Dec 2017 17:09:54 -0500 Subject: [PATCH 43/46] Kwargs in error message --- pandas/core/indexes/multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index bea63161dc2a6..761dbc6086b53 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -199,9 +199,10 @@ def _verify_integrity(self, labels=None, levels=None): " inconsistent state" % (i, label.max(), len(level))) if not level.is_unique: - raise ValueError("Level values must be unique: {0}" - " on level {1}".format( - [value for value in level], i)) + raise ValueError("Level values must be unique: {values} on " + "level {level}".format( + values=[value for value in level], + level=i)) @property def levels(self): From 297216bffd3778c2c66231995bba691985a8c94e Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Sat, 2 Dec 2017 17:10:41 -0500 Subject: [PATCH 44/46] Removed unnecessary comment --- pandas/tests/groupby/test_functional.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/groupby/test_functional.py b/pandas/tests/groupby/test_functional.py index 2360b9167ff9a..b9718663570bd 100644 --- a/pandas/tests/groupby/test_functional.py +++ b/pandas/tests/groupby/test_functional.py @@ -67,7 +67,6 @@ def test_frame_describe_multikey(self): 'C': 1, 'D': 1}, axis=1) result = groupedT.describe() expected = self.tsframe.describe().T - # GH 17464 - Remove duplicate MultiIndex levels expected.index = pd.MultiIndex( levels=[[0, 1], expected.index], labels=[[0, 0, 1, 1], range(len(expected.index))]) From fead79f26cc428900842536c7dd4342b711789f9 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Sat, 2 Dec 2017 17:12:49 -0500 Subject: [PATCH 45/46] Added blank line --- pandas/tests/indexes/test_multi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 82223cd7b631c..a2c0a75e21f43 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -3089,6 +3089,7 @@ def test_duplicate_multiindex_labels(self): # Make sure that a MultiIndex with duplicate levels throws a ValueError with pytest.raises(ValueError): ind = pd.MultiIndex([['A'] * 10, range(10)], [[0] * 10, range(10)]) + # And that using set_levels with duplicate levels fails ind = MultiIndex.from_arrays([['A', 'A', 'B', 'B', 'B'], [1, 2, 1, 2, 3]]) From 703ff1e385809fed2c741cc2de25153f1b7c49c8 Mon Sep 17 00:00:00 2001 From: Chris Mazzullo Date: Sat, 2 Dec 2017 17:22:32 -0500 Subject: [PATCH 46/46] Got rid of duplicated tests --- pandas/tests/groupby/test_groupby.py | 77 ---------------------------- 1 file changed, 77 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 6a9facdc7e903..3436dd9169081 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -385,83 +385,6 @@ def test_attr_wrapper(self): # make sure raises error pytest.raises(AttributeError, getattr, grouped, 'foo') - def test_series_describe_multikey(self): - ts = tm.makeTimeSeries() - grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe() - assert_series_equal(result['mean'], grouped.mean(), check_names=False) - assert_series_equal(result['std'], grouped.std(), check_names=False) - assert_series_equal(result['min'], grouped.min(), check_names=False) - - def test_series_describe_single(self): - ts = tm.makeTimeSeries() - grouped = ts.groupby(lambda x: x.month) - result = grouped.apply(lambda x: x.describe()) - expected = grouped.describe().stack() - assert_series_equal(result, expected) - - def test_series_index_name(self): - grouped = self.df.loc[:, ['C']].groupby(self.df['A']) - result = grouped.agg(lambda x: x.mean()) - assert result.index.name == 'A' - - def test_frame_describe_multikey(self): - grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) - result = grouped.describe() - desc_groups = [] - for col in self.tsframe: - group = grouped[col].describe() - # GH 17464 - Remove duplicate MultiIndex levels - group_col = pd.MultiIndex( - levels=[[col], group.columns], - labels=[[0] * len(group.columns), range(len(group.columns))]) - group = pd.DataFrame(group.values, - columns=group_col, - index=group.index) - desc_groups.append(group) - expected = pd.concat(desc_groups, axis=1) - tm.assert_frame_equal(result, expected) - - groupedT = self.tsframe.groupby({'A': 0, 'B': 0, - 'C': 1, 'D': 1}, axis=1) - result = groupedT.describe() - expected = self.tsframe.describe().T - # GH 17464 - Remove duplicate MultiIndex levels - expected.index = pd.MultiIndex( - levels=[[0, 1], expected.index], - labels=[[0, 0, 1, 1], range(len(expected.index))]) - tm.assert_frame_equal(result, expected) - - def test_frame_describe_tupleindex(self): - - # GH 14848 - regression from 0.19.0 to 0.19.1 - df1 = DataFrame({'x': [1, 2, 3, 4, 5] * 3, - 'y': [10, 20, 30, 40, 50] * 3, - 'z': [100, 200, 300, 400, 500] * 3}) - df1['k'] = [(0, 0, 1), (0, 1, 0), (1, 0, 0)] * 5 - df2 = df1.rename(columns={'k': 'key'}) - pytest.raises(ValueError, lambda: df1.groupby('k').describe()) - pytest.raises(ValueError, lambda: df2.groupby('key').describe()) - - def test_frame_describe_unstacked_format(self): - # GH 4792 - prices = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 24990, - pd.Timestamp('2011-01-06 12:43:33', tz=None): 25499, - pd.Timestamp('2011-01-06 12:54:09', tz=None): 25499} - volumes = {pd.Timestamp('2011-01-06 10:59:05', tz=None): 1500000000, - pd.Timestamp('2011-01-06 12:43:33', tz=None): 5000000000, - pd.Timestamp('2011-01-06 12:54:09', tz=None): 100000000} - df = pd.DataFrame({'PRICE': prices, - 'VOLUME': volumes}) - result = df.groupby('PRICE').VOLUME.describe() - data = [df[df.PRICE == 24990].VOLUME.describe().values.tolist(), - df[df.PRICE == 25499].VOLUME.describe().values.tolist()] - expected = pd.DataFrame(data, - index=pd.Index([24990, 25499], name='PRICE'), - columns=['count', 'mean', 'std', 'min', - '25%', '50%', '75%', 'max']) - tm.assert_frame_equal(result, expected) - def test_frame_groupby(self): grouped = self.tsframe.groupby(lambda x: x.weekday())