diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 40cbf67f1ea5e..00c1a61521a44 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -614,6 +614,7 @@ Deprecations - The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`). - ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`) - The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`) +- Support for multi-dimensional indexing (e.g. ``index[:, None]``) on a :class:`Index` is deprecated and will be removed in a future version, convert to a numpy array before indexing instead (:issue:`30588`) **Selecting Columns from a Grouped DataFrame** @@ -1075,6 +1076,7 @@ Other - Bug in :meth:`DataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) - Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) - Handle nested NumPy ``object`` arrays in :func:`testing.assert_series_equal` for ExtensionArray implementations (:issue:`30841`) +- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a67d31344ff55..1d87753be8cc3 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2007,9 +2007,10 @@ def __getitem__(self, key): if com.is_bool_indexer(key): key = check_bool_array_indexer(self, key) - return self._constructor( - values=self._codes[key], dtype=self.dtype, fastpath=True - ) + result = self._codes[key] + if result.ndim > 1: + return result + return self._constructor(result, dtype=self.dtype, fastpath=True) def __setitem__(self, key, value): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 06c1338dbf5ab..b381f43d33c53 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -543,8 +543,6 @@ def __getitem__(self, key): if result.ndim > 1: # To support MPL which performs slicing with 2 dim # even though it only has 1 dim by definition - if is_period: - return self._simple_new(result, dtype=self.dtype, freq=freq) return result return self._simple_new(result, dtype=self.dtype, freq=freq) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index e6cfca74048e7..e4efb187ffae5 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -500,8 +500,11 @@ def __getitem__(self, value): # scalar if not isinstance(left, ABCIndexClass): - if isna(left): + if is_scalar(left) and isna(left): return self._fill_value + if np.ndim(left) > 1: + # GH#30588 multi-dimensional indexer disallowed + raise ValueError("multi-dimensional indexing not allowed") return Interval(left, right, self.closed) return self._shallow_copy(left, right) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b02c0a08c93fa..143bb8dbda8b4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -393,6 +393,9 @@ def __new__( if kwargs: raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") return cls._simple_new(subarr, name, **kwargs) elif hasattr(data, "__array__"): @@ -608,7 +611,7 @@ def __array_wrap__(self, result, context=None): Gets called after a ufunc. """ result = lib.item_from_zerodim(result) - if is_bool_dtype(result) or lib.is_scalar(result): + if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: return result attrs = self._get_attributes_dict() @@ -687,11 +690,10 @@ def astype(self, dtype, copy=True): return Index(np.asarray(self), dtype=dtype, copy=copy) try: - return Index( - self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype - ) + casted = self.values.astype(dtype, copy=copy) except (TypeError, ValueError): raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}") + return Index(casted, name=self.name, dtype=dtype) _index_shared_docs[ "take" @@ -3902,6 +3904,9 @@ def __getitem__(self, key): key = com.values_from_object(key) result = getitem(key) if not is_scalar(result): + if np.ndim(result) > 1: + deprecate_ndim_indexing(result) + return result return promote(result) else: return result @@ -5533,3 +5538,17 @@ def _try_convert_to_int_array( pass raise ValueError + + +def deprecate_ndim_indexing(result): + if np.ndim(result) > 1: + # GH#27125 indexer like idx[:, None] expands dim, but we + # cannot do that and keep an index, so return ndarray + # Deprecation GH#30588 + warnings.warn( + "Support for multi-dimensional indexing (e.g. `index[:, None]`) " + "on an Index is deprecated and will be removed in a future " + "version. Convert to a numpy array before indexing instead.", + DeprecationWarning, + stacklevel=3, + ) diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py index bd089f574a313..58fcce7e59be7 100644 --- a/pandas/core/indexes/extension.py +++ b/pandas/core/indexes/extension.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.arrays import ExtensionArray -from pandas.core.indexes.base import Index +from pandas.core.indexes.base import Index, deprecate_ndim_indexing from pandas.core.ops import get_op_result_name @@ -178,6 +178,7 @@ def __getitem__(self, key): return type(self)(result, name=self.name) # Includes cases where we get a 2D ndarray back for MPL compat + deprecate_ndim_indexing(result) return result def __iter__(self): diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 39cbe5f151262..b9b44284edaa9 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -73,6 +73,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None): else: subarr = data + if subarr.ndim > 1: + # GH#13601, GH#20285, GH#27125 + raise ValueError("Index data must be 1-dimensional") + name = maybe_extract_name(name, data, cls) return cls._simple_new(subarr, name=name) diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index d09dc586fe056..e027641288bb9 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -975,3 +975,9 @@ def test_engine_type(self, dtype, engine_type): ci.values._codes = ci.values._codes.astype("int64") assert np.issubdtype(ci.codes.dtype, dtype) assert isinstance(ci._engine, engine_type) + + def test_getitem_2d_deprecated(self): + # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable + idx = self.create_index() + with pytest.raises(ValueError, match="cannot mask with array containing NA"): + idx[:, None] diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index ceb3ac8b61c0b..a16017b0e12c0 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -875,3 +875,11 @@ def test_engine_reference_cycle(self): nrefs_pre = len(gc.get_referrers(index)) index._engine assert len(gc.get_referrers(index)) == nrefs_pre + + def test_getitem_2d_deprecated(self): + # GH#30588 + idx = self.create_index() + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + res = idx[:, None] + + assert isinstance(res, np.ndarray), type(res) diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 91007a1ba529e..4c600e510790a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -86,7 +86,9 @@ def test_dti_business_getitem(self): def test_dti_business_getitem_matplotlib_hackaround(self): rng = pd.bdate_range(START, END) - values = rng[:, None] + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + values = rng[:, None] expected = rng.values[:, None] tm.assert_numpy_array_equal(values, expected) @@ -110,7 +112,9 @@ def test_dti_custom_getitem(self): def test_dti_custom_getitem_matplotlib_hackaround(self): rng = pd.bdate_range(START, END, freq="C") - values = rng[:, None] + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + values = rng[:, None] expected = rng.values[:, None] tm.assert_numpy_array_equal(values, expected) diff --git a/pandas/tests/indexes/interval/test_base.py b/pandas/tests/indexes/interval/test_base.py index 91f8dddea71d7..d8c2ba8413cfb 100644 --- a/pandas/tests/indexes/interval/test_base.py +++ b/pandas/tests/indexes/interval/test_base.py @@ -79,3 +79,10 @@ def test_where(self, closed, klass): expected = IntervalIndex([np.nan] + idx[1:].tolist()) result = idx.where(klass(cond)) tm.assert_index_equal(result, expected) + + def test_getitem_2d_deprecated(self): + # GH#30588 multi-dim indexing is deprecated, but raising is also acceptable + idx = self.create_index() + with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + idx[:, None] diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2c7fc8b320325..8dd430b55117e 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -71,7 +71,9 @@ def test_can_hold_identifiers(self): @pytest.mark.parametrize("index", ["datetime"], indirect=True) def test_new_axis(self, index): - new_index = index[None, :] + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + new_index = index[None, :] assert new_index.ndim == 2 assert isinstance(new_index, np.ndarray) @@ -2784,9 +2786,35 @@ def test_shape_of_invalid_index(): # about this). However, as long as this is not solved in general,this test ensures # that the returned shape is consistent with this underlying array for # compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775) - a = np.arange(8).reshape(2, 2, 2) - idx = pd.Index(a) - assert idx.shape == a.shape - idx = pd.Index([0, 1, 2, 3]) - assert idx[:, None].shape == (4, 1) + with tm.assert_produces_warning(DeprecationWarning): + # GH#30588 multi-dimensional indexing deprecated + assert idx[:, None].shape == (4, 1) + + +def test_validate_1d_input(): + # GH#27125 check that we do not have >1-dimensional input + msg = "Index data must be 1-dimensional" + + arr = np.arange(8).reshape(2, 2, 2) + with pytest.raises(ValueError, match=msg): + pd.Index(arr) + + with pytest.raises(ValueError, match=msg): + pd.Float64Index(arr.astype(np.float64)) + + with pytest.raises(ValueError, match=msg): + pd.Int64Index(arr.astype(np.int64)) + + with pytest.raises(ValueError, match=msg): + pd.UInt64Index(arr.astype(np.uint64)) + + df = pd.DataFrame(arr.reshape(4, 2)) + with pytest.raises(ValueError, match=msg): + pd.Index(df) + + # GH#13601 trying to assign a multi-dimensional array to an index is not + # allowed + ser = pd.Series(0, range(4)) + with pytest.raises(ValueError, match=msg): + ser.index = np.array([[2, 3]] * 4) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index ea4d8edd2f413..448a06070c45c 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -83,12 +83,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): msg = ( r"Buffer has wrong number of dimensions \(expected 1," r" got 3\)|" - "The truth value of an array with more than one element is " - "ambiguous|" "Cannot index with multidimensional key|" r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|" - "No matching signature found|" # TypeError - "unhashable type: 'numpy.ndarray'" # TypeError + "Index data must be 1-dimensional" ) if ( @@ -104,21 +101,12 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id): "categorical", ] ): - idxr[nd3] - else: - if ( - isinstance(obj, DataFrame) - and idxr_id == "getitem" - and index.inferred_type == "boolean" - ): - error = TypeError - elif idxr_id == "getitem" and index.inferred_type == "interval": - error = TypeError - else: - error = ValueError - - with pytest.raises(error, match=msg): + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): idxr[nd3] + else: + with pytest.raises(ValueError, match=msg): + with tm.assert_produces_warning(DeprecationWarning): + idxr[nd3] @pytest.mark.parametrize( "index", tm.all_index_generator(5), ids=lambda x: type(x).__name__ @@ -146,16 +134,14 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): nd3 = np.random.randint(5, size=(2, 2, 2)) msg = ( - r"Buffer has wrong number of dimensions \(expected 1, " - r"got 3\)|" - "The truth value of an array with more than one element is " - "ambiguous|" - "Only 1-dimensional input arrays are supported|" + r"Buffer has wrong number of dimensions \(expected 1," + r" got 3\)|" "'pandas._libs.interval.IntervalTree' object has no attribute " "'set_value'|" # AttributeError "unhashable type: 'numpy.ndarray'|" # TypeError "No matching signature found|" # TypeError - r"^\[\[\[" # pandas.core.indexing.IndexingError + r"^\[\[\[|" # pandas.core.indexing.IndexingError + "Index data must be 1-dimensional" ) if (idxr_id == "iloc") or ( @@ -176,10 +162,8 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id): ): idxr[nd3] = 0 else: - with pytest.raises( - (ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError), - match=msg, - ): + err = (ValueError, AttributeError) + with pytest.raises(err, match=msg): idxr[nd3] = 0 def test_inf_upcast(self): diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 39d6bc69e7c00..0038df78dd866 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -136,7 +136,7 @@ def test_write_with_index(self): # column multi-index df.index = [0, 1, 2] - df.columns = (pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),) + df.columns = pd.MultiIndex.from_tuples([("a", 1)]) self.check_error_on_write(df, ValueError) def test_path_pathlib(self): diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 8fdf86ddabafe..9cd3ccbf9214e 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -66,11 +66,10 @@ def test_registering_no_warning(self): # Set to the "warn" state, in case this isn't the first test run register_matplotlib_converters() - with tm.assert_produces_warning(None) as w: + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + # GH#30588 DeprecationWarning from 2D indexing ax.plot(s.index, s.values) - assert len(w) == 0 - def test_pandas_plots_register(self): pytest.importorskip("matplotlib.pyplot") s = Series(range(12), index=date_range("2017", periods=12)) @@ -101,19 +100,16 @@ def test_option_no_warning(self): # Test without registering first, no warning with ctx: - with tm.assert_produces_warning(None) as w: + # GH#30588 DeprecationWarning from 2D indexing on Index + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): ax.plot(s.index, s.values) - assert len(w) == 0 - # Now test with registering register_matplotlib_converters() with ctx: - with tm.assert_produces_warning(None) as w: + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): ax.plot(s.index, s.values) - assert len(w) == 0 - def test_registry_resets(self): units = pytest.importorskip("matplotlib.units") dates = pytest.importorskip("matplotlib.dates") diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index a135c0cf7cd7e..a2d14f27d7b7a 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -137,7 +137,9 @@ def test_first_last_valid(self, datetime_series): assert ts.last_valid_index().freq == ts.index.freq def test_mpl_compat_hack(self, datetime_series): - result = datetime_series[:, np.newaxis] + with tm.assert_produces_warning(DeprecationWarning, check_stacklevel=False): + # GH#30588 multi-dimensional indexing deprecated + result = datetime_series[:, np.newaxis] expected = datetime_series.values[:, np.newaxis] tm.assert_almost_equal(result, expected)