Skip to content

REF: use check_setitem_lengths in DTA.__setitem__ #36339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 5 additions & 19 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime, timedelta
import operator
from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union, cast
from typing import Any, Callable, Optional, Sequence, Tuple, Type, TypeVar, Union
import warnings

import numpy as np
Expand Down Expand Up @@ -58,7 +58,7 @@
from pandas.core.arrays.base import ExtensionOpsMixin
import pandas.core.common as com
from pandas.core.construction import array, extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.indexers import check_array_indexer, check_setitem_lengths
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.ops.invalid import invalid_comparison, make_invalid_op

Expand Down Expand Up @@ -605,23 +605,9 @@ def __setitem__(
# to a period in from_sequence). For DatetimeArray, it's Timestamp...
# I don't know if mypy can do that, possibly with Generics.
# https://mypy.readthedocs.io/en/latest/generics.html
if is_list_like(value):
is_slice = isinstance(key, slice)

if lib.is_scalar(key):
raise ValueError("setting an array element with a sequence.")

if not is_slice:
key = cast(Sequence, key)
if len(key) != len(value) and not com.is_bool_indexer(key):
msg = (
f"shape mismatch: value array of length '{len(key)}' "
"does not match indexing result of length "
f"'{len(value)}'."
)
raise ValueError(msg)
elif not len(key):
return
no_op = check_setitem_lengths(key, value, self)
if no_op:
return

value = self._validate_setitem_value(value)
key = check_array_indexer(self, key)
Expand Down
52 changes: 33 additions & 19 deletions pandas/core/indexers.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def is_empty_indexer(indexer, arr_value: np.ndarray) -> bool:
# Indexer Validation


def check_setitem_lengths(indexer, value, values) -> None:
def check_setitem_lengths(indexer, value, values) -> bool:
"""
Validate that value and indexer are the same length.

Expand All @@ -133,34 +133,48 @@ def check_setitem_lengths(indexer, value, values) -> None:

Returns
-------
None
bool
Whether this is an empty listlike setting which is a no-op.

Raises
------
ValueError
When the indexer is an ndarray or list and the lengths don't match.
"""
# boolean with truth values == len of the value is ok too
no_op = False

if isinstance(indexer, (np.ndarray, list)):
if is_list_like(value) and len(indexer) != len(value):
if not (
isinstance(indexer, np.ndarray)
and indexer.dtype == np.bool_
and len(indexer[indexer]) == len(value)
):
raise ValueError(
"cannot set using a list-like indexer "
"with a different length than the value"
)
# We can ignore other listlikes becasue they are either
# a) not necessarily 1-D indexers, e.g. tuple
# b) boolean indexers e.g. BoolArray
if is_list_like(value):
if len(indexer) != len(value):
if not (
isinstance(indexer, np.ndarray)
and indexer.dtype == np.bool_
and len(indexer[indexer]) == len(value)
):
# boolean with truth values == len of the value is ok too
raise ValueError(
"cannot set using a list-like indexer "
"with a different length than the value"
)
if len(indexer) == 0:
no_op = True

elif isinstance(indexer, slice):
# slice
if is_list_like(value) and len(values):
if len(value) != length_of_indexer(indexer, values):
raise ValueError(
"cannot set using a slice indexer with a "
"different length than the value"
)
if is_list_like(value):
if len(values):
if len(value) != length_of_indexer(indexer, values):
raise ValueError(
"cannot set using a slice indexer with a "
"different length than the value"
)
else:
# TODO: dont we still need lengths to match?
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think yes?
But can you do that here, since the original code (I think?) did it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the existing code does not check for length-match in the len==0 case. ill take a look at adding that check and getting rid of this comment

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It actually is checked right now, because of the len(key) != len(value) before checking elif not len(key): (but it's clearly not tested ..)

As example:

In [1]: arr = pd.date_range("2012", periods=3)._data                                                                                                                                                               

In [2]: arr                                                                                                                                                                                                        
Out[2]: 
<DatetimeArray>
['2012-01-01 00:00:00', '2012-01-02 00:00:00', '2012-01-03 00:00:00']
Length: 3, dtype: datetime64[ns]

In [3]: arr[[]] = [pd.Timestamp("2012")]                                                                                                                                                                           
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-3-d4b75e105bf5> in <module>
----> 1 arr[[]] = [pd.Timestamp("2012")]

~/scipy/pandas/pandas/core/arrays/datetimelike.py in __setitem__(self, key, value)
    620                         f"'{len(value)}'."
    621                     )
--> 622                     raise ValueError(msg)
    623                 elif not len(key):
    624                     return

ValueError: shape mismatch: value array of length '0' does not match indexing result of length '1'.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch; i was comparing against the existing check_setitem_lengths

no_op = True
return no_op


def validate_indices(indices: np.ndarray, n: int) -> None:
Expand Down