From fba224d0ac78f3e004fde2936db5a1800d426f29 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 10 Sep 2020 19:58:03 -0700 Subject: [PATCH 1/4] dummy to see if pre-commit fails --- pandas/core/indexers.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index d9aa02db3e42a..16d1086c965ae 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -14,9 +14,12 @@ is_integer, is_integer_dtype, is_list_like, + is_scalar, ) from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries +import pandas.core.common as com + # ----------------------------------------------------------- # Indexer Identification @@ -141,7 +144,7 @@ def check_setitem_lengths(indexer, value, values) -> None: When the indexer is an ndarray or list and the lengths don't match. """ # boolean with truth values == len of the value is ok too - if isinstance(indexer, (np.ndarray, list)): + if isinstance(indexer, (np.ndarray, list)): # TODO: why not other listlike? if is_list_like(value) and len(indexer) != len(value): if not ( isinstance(indexer, np.ndarray) @@ -482,3 +485,34 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: raise IndexError("arrays used as indices must be of integer or boolean type") return indexer + + +def check_empty_setitem(key, value, array) -> bool: + """ + If the value being set is listlike, check that its length matches array[key]. + + If the value is empty, notify the caller that this is a no-op. + NB: we skip some dtype validation on these no-ops. + """ + no_op = False + if is_list_like(value): # TODO: what about object dtype holding listlike? + if is_scalar(key): + raise ValueError("Setting an array element with a sequence.") + + key_len = length_of_indexer(key, array) + if key_len != len(value): + if com.is_bool_indexer(key): + # We will fall through to let numpy raise with an accurate message + pass + else: + msg = ( + f"shape mismatch: value array of length '{len(key)}' " + "does not match indexing result of length " + f"'{len(value)}'." + ) + raise ValueError(msg) + else: + if len(value) == 0: + no_op = True + + return no_op From cb86e9e1d67b7c56bc9a36ce7300099113478c05 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Sep 2020 09:09:13 -0700 Subject: [PATCH 2/4] REF: use check_setitem_lengths in DatetimeArray.__setitem__ --- pandas/core/arrays/datetimelike.py | 24 ++------ pandas/core/indexers.py | 88 ++++++++++++------------------ 2 files changed, 39 insertions(+), 73 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 6302b48cb1978..c620944a63f6a 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1,6 +1,6 @@ from datetime import datetime, timedelta import operator -from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union, cast +from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union import warnings import numpy as np @@ -58,7 +58,7 @@ from pandas.core.arrays.base import ExtensionOpsMixin import pandas.core.common as com from pandas.core.construction import array, extract_array -from pandas.core.indexers import check_array_indexer +from pandas.core.indexers import check_array_indexer, check_setitem_lengths from pandas.core.ops.common import unpack_zerodim_and_defer from pandas.core.ops.invalid import invalid_comparison, make_invalid_op @@ -605,23 +605,9 @@ def __setitem__( # to a period in from_sequence). For DatetimeArray, it's Timestamp... # I don't know if mypy can do that, possibly with Generics. # https://mypy.readthedocs.io/en/latest/generics.html - if is_list_like(value): - is_slice = isinstance(key, slice) - - if lib.is_scalar(key): - raise ValueError("setting an array element with a sequence.") - - if not is_slice: - key = cast(Sequence, key) - if len(key) != len(value) and not com.is_bool_indexer(key): - msg = ( - f"shape mismatch: value array of length '{len(key)}' " - "does not match indexing result of length " - f"'{len(value)}'." - ) - raise ValueError(msg) - elif not len(key): - return + no_op = check_setitem_lengths(key, value, self) + if no_op: + return value = self._validate_setitem_value(value) key = check_array_indexer(self, key) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 16d1086c965ae..b036eddf0182a 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -14,12 +14,9 @@ is_integer, is_integer_dtype, is_list_like, - is_scalar, ) from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries -import pandas.core.common as com - # ----------------------------------------------------------- # Indexer Identification @@ -117,7 +114,7 @@ def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool: # Indexer Validation -def check_setitem_lengths(indexer, value, values) -> None: +def check_setitem_lengths(indexer, value, values) -> bool: """ Validate that value and indexer are the same length. @@ -136,34 +133,48 @@ def check_setitem_lengths(indexer, value, values) -> None: Returns ------- - None + bool + Whether this is an empty listlike setting which is a no-op. Raises ------ ValueError When the indexer is an ndarray or list and the lengths don't match. """ - # boolean with truth values == len of the value is ok too - if isinstance(indexer, (np.ndarray, list)): # TODO: why not other listlike? - if is_list_like(value) and len(indexer) != len(value): - if not ( - isinstance(indexer, np.ndarray) - and indexer.dtype == np.bool_ - and len(indexer[indexer]) == len(value) - ): - raise ValueError( - "cannot set using a list-like indexer " - "with a different length than the value" - ) + no_op = False + + if isinstance(indexer, (np.ndarray, list)): + # We can ignore other listlikes becasue they are either + # a) not necessarily 1-D indexers, e.g. tuple + # b) boolean indexers e.g. BoolArray + if is_list_like(value): + if len(indexer) != len(value): + if not ( + isinstance(indexer, np.ndarray) + and indexer.dtype == np.bool_ + and len(indexer[indexer]) == len(value) + ): + # boolean with truth values == len of the value is ok too + raise ValueError( + "cannot set using a list-like indexer " + "with a different length than the value" + ) + if len(indexer) == 0: + no_op = True elif isinstance(indexer, slice): # slice - if is_list_like(value) and len(values): - if len(value) != length_of_indexer(indexer, values): - raise ValueError( - "cannot set using a slice indexer with a " - "different length than the value" - ) + if is_list_like(value): + if len(values): + if len(value) != length_of_indexer(indexer, values): + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) + else: + # TODO: dont we still need lengths to match? + no_op = True + return no_op def validate_indices(indices: np.ndarray, n: int) -> None: @@ -485,34 +496,3 @@ def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: raise IndexError("arrays used as indices must be of integer or boolean type") return indexer - - -def check_empty_setitem(key, value, array) -> bool: - """ - If the value being set is listlike, check that its length matches array[key]. - - If the value is empty, notify the caller that this is a no-op. - NB: we skip some dtype validation on these no-ops. - """ - no_op = False - if is_list_like(value): # TODO: what about object dtype holding listlike? - if is_scalar(key): - raise ValueError("Setting an array element with a sequence.") - - key_len = length_of_indexer(key, array) - if key_len != len(value): - if com.is_bool_indexer(key): - # We will fall through to let numpy raise with an accurate message - pass - else: - msg = ( - f"shape mismatch: value array of length '{len(key)}' " - "does not match indexing result of length " - f"'{len(value)}'." - ) - raise ValueError(msg) - else: - if len(value) == 0: - no_op = True - - return no_op From 8bcd9026152ebd7aac32b60a23420ede36949a1e Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Sep 2020 12:59:03 -0700 Subject: [PATCH 3/4] suggested edits --- pandas/core/indexers.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index b036eddf0182a..48a299aa9b152 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -149,12 +149,12 @@ def check_setitem_lengths(indexer, value, values) -> bool: # b) boolean indexers e.g. BoolArray if is_list_like(value): if len(indexer) != len(value): + # boolean with truth values == len of the value is ok too if not ( isinstance(indexer, np.ndarray) and indexer.dtype == np.bool_ and len(indexer[indexer]) == len(value) ): - # boolean with truth values == len of the value is ok too raise ValueError( "cannot set using a list-like indexer " "with a different length than the value" @@ -163,17 +163,15 @@ def check_setitem_lengths(indexer, value, values) -> bool: no_op = True elif isinstance(indexer, slice): - # slice if is_list_like(value): - if len(values): - if len(value) != length_of_indexer(indexer, values): - raise ValueError( - "cannot set using a slice indexer with a " - "different length than the value" - ) - else: - # TODO: dont we still need lengths to match? + if len(value) != length_of_indexer(indexer, values): + raise ValueError( + "cannot set using a slice indexer with a " + "different length than the value" + ) + if len(value) == 0: no_op = True + return no_op From a8c04941232e1ed3c147fac3a3464a21e7712f53 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 13 Sep 2020 17:45:18 -0700 Subject: [PATCH 4/4] sty, tests --- pandas/core/indexers.py | 4 ++-- pandas/tests/arrays/test_datetimelike.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index 48a299aa9b152..6c88ae1e03cda 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -159,7 +159,7 @@ def check_setitem_lengths(indexer, value, values) -> bool: "cannot set using a list-like indexer " "with a different length than the value" ) - if len(indexer) == 0: + if not len(indexer): no_op = True elif isinstance(indexer, slice): @@ -169,7 +169,7 @@ def check_setitem_lengths(indexer, value, values) -> bool: "cannot set using a slice indexer with a " "different length than the value" ) - if len(value) == 0: + if not len(value): no_op = True return no_op diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 0ae6b5bde5297..83b98525d3e8a 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -338,6 +338,16 @@ def test_setitem_raises(self): with pytest.raises(TypeError, match="'value' should be a.* 'object'"): arr[0] = object() + msg = "cannot set using a list-like indexer with a different length" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[[]] = [arr[1]] + + msg = "cannot set using a slice indexer with a different length than" + with pytest.raises(ValueError, match=msg): + # GH#36339 + arr[1:1] = arr[:3] + @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) def test_setitem_numeric_raises(self, arr1d, box): # We dont case e.g. int64 to our own dtype for setitem