From 69cf6724e18a0fc6130e539c7c2738a0282f968c Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Wed, 15 Nov 2017 16:14:37 +0100 Subject: [PATCH 1/3] BUG: cast to correct dtype in Index.drop() closes #18304 --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/common.py | 4 ++-- pandas/core/indexes/base.py | 3 ++- pandas/tests/indexes/test_base.py | 21 +++++++++++++++++++++ 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 24f3e4433411e..0fc862210fc6e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -301,6 +301,7 @@ Indexing - Bug in :func:`MultiIndex.remove_unused_levels` which would fill nan values (:issue:`18417`) - Bug in :func:`MultiIndex.from_tuples`` which would fail to take zipped tuples in python3 (:issue:`18434`) - Bug in :class:`Index` construction from list of mixed type tuples (:issue:`18505`) +- Bug in :func:`Index.drop` when passing a list of both tuples and non-tuples (:issue:`18304`) - Bug in :class:`IntervalIndex` where empty and purely NA data was constructed inconsistently depending on the construction method (:issue:`18421`) - Bug in :func:`IntervalIndex.symmetric_difference` where the symmetric difference with a non-``IntervalIndex`` did not raise (:issue:`18475`) - Bug in indexing a datetimelike ``Index`` that raised ``ValueError`` instead of ``IndexError`` (:issue:`18386`). diff --git a/pandas/core/common.py b/pandas/core/common.py index 35696be5b2a03..1295aa35a4c31 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -398,7 +398,7 @@ def _asarray_tuplesafe(values, dtype=None): return result -def _index_labels_to_array(labels): +def _index_labels_to_array(labels, dtype=None): if isinstance(labels, (compat.string_types, tuple)): labels = [labels] @@ -408,7 +408,7 @@ def _index_labels_to_array(labels): except TypeError: # non-iterable labels = [labels] - labels = _asarray_tuplesafe(labels) + labels = _asarray_tuplesafe(labels, dtype=dtype) return labels diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 78c7cb7b1d30c..04b8ade7e5253 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3761,7 +3761,8 @@ def drop(self, labels, errors='raise'): ------- dropped : Index """ - labels = _index_labels_to_array(labels) + arr_dtype = 'object' if self.dtype == 'object' else None + labels = _index_labels_to_array(labels, dtype=arr_dtype) indexer = self.get_indexer(labels) mask = indexer == -1 if mask.any(): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index e09f4ad360843..e33fd1e0f4c1e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1429,6 +1429,27 @@ def test_drop(self): expected = Index([1, 2]) tm.assert_index_equal(dropped, expected) + @pytest.mark.parametrize("values", [['a', 'b', ('c', 'd')], + ['a', ('c', 'd'), 'b'], + [('c', 'd'), 'a', 'b']]) + @pytest.mark.parametrize("to_drop", [[('c', 'd'), 'a'], ['a', ('c', 'd')]]) + def test_drop_tuple(self, values, to_drop): + # GH 18304 + index = pd.Index(values) + expected = pd.Index(['b']) + + result = index.drop(to_drop) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[0]) + for drop_me in to_drop[1], [to_drop[1]]: + result = removed.drop(drop_me) + tm.assert_index_equal(result, expected) + + removed = index.drop(to_drop[1]) + for drop_me in to_drop[1], [to_drop[1]]: + pytest.raises(ValueError, removed.drop, drop_me) + def test_tuple_union_bug(self): import pandas import numpy as np From 30df6cef0481931dd584d0569b831a830f663d62 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 16 Nov 2017 09:43:29 +0100 Subject: [PATCH 2/3] TST: Test for original issue (pd.crosstab) --- pandas/tests/reshape/test_pivot.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 6b44a339fad73..5b64f62527da4 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1631,3 +1631,15 @@ def test_crosstab_dup_index_names(self): index=expected_index, columns=expected_index) tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("names", [['a', ('b', 'c')], + [('a', 'b'), 'c']]) + def test_crosstab_tuple_name(self, names): + s1 = pd.Series(range(3), name=names[0]) + s2 = pd.Series(range(1, 4), name=names[1]) + + mi = pd.MultiIndex.from_arrays([range(3), range(1, 4)], names=names) + expected = pd.Series(1, index=mi).unstack(1, fill_value=0) + + result = pd.crosstab(s1, s2) + tm.assert_frame_equal(result, expected) From 7fea97a4678e2c788b2e6f03c6dabd7846a5891f Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 16 Nov 2017 09:55:43 +0100 Subject: [PATCH 3/3] DOC: docstring for _index_labels_to_array --- pandas/core/common.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/core/common.py b/pandas/core/common.py index 1295aa35a4c31..775ecc32b0f3c 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -399,6 +399,18 @@ def _asarray_tuplesafe(values, dtype=None): def _index_labels_to_array(labels, dtype=None): + """ + Transform label or iterable of labels to array, for use in Index. + + Parameters + ---------- + dtype : dtype + If specified, use as dtype of the resulting array, otherwise infer. + + Returns + ------- + array + """ if isinstance(labels, (compat.string_types, tuple)): labels = [labels]