Skip to content

BUG: validate Index data is 1D + deprecate multi-dim indexing #30588

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jan 9, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ Deprecations
- The ``pandas.util.testing`` module has been deprecated. Use the public API in ``pandas.testing`` documented at :ref:`api.general.testing` (:issue:`16232`).
- ``pandas.SparseArray`` has been deprecated. Use ``pandas.arrays.SparseArray`` (:class:`arrays.SparseArray`) instead. (:issue:`30642`)
- The parameter ``is_copy`` of :meth:`DataFrame.take` has been deprecated and will be removed in a future version. (:issue:`27357`)
- Support for indexing indexing an :class:`Index` with ``index[:, None]`` is deprecated and will be removed in a future version (:issue:`30588`)

**Selecting Columns from a Grouped DataFrame**

Expand Down Expand Up @@ -1034,6 +1035,7 @@ Other
- Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
- Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
- Bug in :class:`Index` constructor incorrectly allowing 2-dimensional input arrays (:issue:`13601`, :issue:`27125`)

.. _whatsnew_1000.contributors:

Expand Down
21 changes: 17 additions & 4 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,9 @@ def __new__(

if kwargs:
raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}")
if subarr.ndim > 1:
# GH#13601, GH#20285, GH#27125
raise ValueError("Index data must be 1-dimensional")
return cls._simple_new(subarr, name, **kwargs)

elif hasattr(data, "__array__"):
Expand Down Expand Up @@ -608,7 +611,7 @@ def __array_wrap__(self, result, context=None):
Gets called after a ufunc.
"""
result = lib.item_from_zerodim(result)
if is_bool_dtype(result) or lib.is_scalar(result):
if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
return result

attrs = self._get_attributes_dict()
Expand Down Expand Up @@ -687,11 +690,10 @@ def astype(self, dtype, copy=True):
return Index(np.asarray(self), dtype=dtype, copy=copy)

try:
return Index(
self.values.astype(dtype, copy=copy), name=self.name, dtype=dtype
)
casted = self.values.astype(dtype, copy=copy)
except (TypeError, ValueError):
raise TypeError(f"Cannot cast {type(self).__name__} to dtype {dtype}")
return Index(casted, name=self.name, dtype=dtype)

_index_shared_docs[
"take"
Expand Down Expand Up @@ -3931,6 +3933,17 @@ def __getitem__(self, key):
key = com.values_from_object(key)
result = getitem(key)
if not is_scalar(result):
if np.ndim(result) > 1:
# GH#27125 indexer like idx[:, None] expands dim, but we
# cannot do that and keep an index, so return ndarray
# Deprecation GH#30588
warnings.warn(
"Support for Index[:, None] is deprecated and will be "
"removed in a future version.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment here about Index[:, None] and recommended alternative as I posted above for the whatsnew

DeprecationWarning,
stacklevel=2,
)
return result
return promote(result)
else:
return result
Expand Down
4 changes: 4 additions & 0 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None):
else:
subarr = data

if subarr.ndim > 1:
# GH#13601, GH#20285, GH#27125
raise ValueError("Index data must be 1-dimensional")

name = maybe_extract_name(name, data, cls)
return cls._simple_new(subarr, name=name)

Expand Down
43 changes: 38 additions & 5 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1905,6 +1905,14 @@ def test_set_value_deprecated(self):
idx.set_value(arr, idx[1], 80)
assert arr[1] == 80

def test_getitem_2d_deprecated(self):
# GH#30588
idx = self.create_index()
with tm.assert_produces_warning(DeprecationWarning):
res = idx[:, None]

assert isinstance(res, np.ndarray)

@pytest.mark.parametrize(
"index", ["string", "int", "datetime", "timedelta"], indirect=True
)
Expand Down Expand Up @@ -2784,9 +2792,34 @@ def test_shape_of_invalid_index():
# about this). However, as long as this is not solved in general,this test ensures
# that the returned shape is consistent with this underlying array for
# compat with matplotlib (see https://github.com/pandas-dev/pandas/issues/27775)
a = np.arange(8).reshape(2, 2, 2)
idx = pd.Index(a)
assert idx.shape == a.shape

idx = pd.Index([0, 1, 2, 3])
assert idx[:, None].shape == (4, 1)
with tm.assert_produces_warning(DeprecationWarning):
assert idx[:, None].shape == (4, 1)


def test_validate_1d_input():
# GH#27125 check that we do not have >1-dimensional input
msg = "Index data must be 1-dimensional"

arr = np.arange(8).reshape(2, 2, 2)
with pytest.raises(ValueError, match=msg):
pd.Index(arr)

with pytest.raises(ValueError, match=msg):
pd.Float64Index(arr.astype(np.float64))

with pytest.raises(ValueError, match=msg):
pd.Int64Index(arr.astype(np.int64))

with pytest.raises(ValueError, match=msg):
pd.UInt64Index(arr.astype(np.uint64))

df = pd.DataFrame(arr.reshape(4, 2))
with pytest.raises(ValueError, match=msg):
pd.Index(df)

# GH#13601 trying to assign a multi-dimensional array to an index is not
# allowed
ser = pd.Series(0, range(4))
with pytest.raises(ValueError, match=msg):
ser.index = np.array([[2, 3]] * 4)
30 changes: 6 additions & 24 deletions pandas/tests/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,9 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
msg = (
r"Buffer has wrong number of dimensions \(expected 1,"
r" got 3\)|"
"The truth value of an array with more than one element is"
" ambiguous|"
"Cannot index with multidimensional key|"
r"Wrong number of dimensions. values.ndim != ndim \[3 != 1\]|"
"No matching signature found|" # TypeError
"unhashable type: 'numpy.ndarray'" # TypeError
"Index data must be 1-dimensional"
)

if (
Expand All @@ -106,18 +103,7 @@ def test_getitem_ndarray_3d(self, index, obj, idxr, idxr_id):
):
idxr[nd3]
else:
if (
isinstance(obj, DataFrame)
and idxr_id == "getitem"
and index.inferred_type == "boolean"
):
error = TypeError
elif idxr_id == "getitem" and index.inferred_type == "interval":
error = TypeError
else:
error = ValueError

with pytest.raises(error, match=msg):
with pytest.raises(ValueError, match=msg):
idxr[nd3]

@pytest.mark.parametrize(
Expand Down Expand Up @@ -148,14 +134,12 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
msg = (
r"Buffer has wrong number of dimensions \(expected 1,"
r" got 3\)|"
"The truth value of an array with more than one element is"
" ambiguous|"
"Only 1-dimensional input arrays are supported|"
"'pandas._libs.interval.IntervalTree' object has no attribute"
" 'set_value'|" # AttributeError
"unhashable type: 'numpy.ndarray'|" # TypeError
"No matching signature found|" # TypeError
r"^\[\[\[" # pandas.core.indexing.IndexingError
r"^\[\[\[|" # pandas.core.indexing.IndexingError
"Index data must be 1-dimensional"
)

if (idxr_id == "iloc") or (
Expand All @@ -176,10 +160,8 @@ def test_setitem_ndarray_3d(self, index, obj, idxr, idxr_id):
):
idxr[nd3] = 0
else:
with pytest.raises(
(ValueError, AttributeError, TypeError, pd.core.indexing.IndexingError),
match=msg,
):
err = (ValueError, AttributeError)
with pytest.raises(err, match=msg):
idxr[nd3] = 0

def test_inf_upcast(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_write_with_index(self):

# column multi-index
df.index = [0, 1, 2]
df.columns = (pd.MultiIndex.from_tuples([("a", 1), ("a", 2), ("b", 1)]),)
df.columns = pd.MultiIndex.from_tuples([("a", 1)])
self.check_error_on_write(df, ValueError)

def test_path_pathlib(self):
Expand Down