Skip to content

Commit 9a98aca

Browse files
authored
REF: implement LossySetitemError (#45672)
1 parent 2dd75ca commit 9a98aca

File tree

6 files changed

+47
-31
lines changed

6 files changed

+47
-31
lines changed

pandas/core/arrays/interval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
from pandas.errors import IntCastingNaNError
4040
from pandas.util._decorators import Appender
4141

42+
from pandas.core.dtypes.cast import LossySetitemError
4243
from pandas.core.dtypes.common import (
4344
is_categorical_dtype,
4445
is_dtype_equal,
@@ -1081,7 +1082,7 @@ def _validate_listlike(self, value):
10811082

10821083
try:
10831084
self.left._validate_fill_value(value_left)
1084-
except (ValueError, TypeError) as err:
1085+
except (LossySetitemError, TypeError) as err:
10851086
msg = (
10861087
"'value' should be a compatible interval type, "
10871088
f"got {type(value)} instead."

pandas/core/dtypes/cast.py

+31-21
Original file line numberDiff line numberDiff line change
@@ -1924,6 +1924,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
19241924
arr._validate_setitem_value(element)
19251925
return True
19261926
except (ValueError, TypeError):
1927+
# TODO(2.0): stop catching ValueError for tzaware, see
1928+
# _catch_deprecated_value_error
19271929
return False
19281930

19291931
# This is technically incorrect, but maintains the behavior of
@@ -1933,7 +1935,7 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool:
19331935
try:
19341936
np_can_hold_element(dtype, element)
19351937
return True
1936-
except (TypeError, ValueError):
1938+
except (TypeError, LossySetitemError):
19371939
return False
19381940

19391941

@@ -1963,7 +1965,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
19631965
if isinstance(element, range):
19641966
if _dtype_can_hold_range(element, dtype):
19651967
return element
1966-
raise ValueError
1968+
raise LossySetitemError
19671969

19681970
elif is_integer(element) or (is_float(element) and element.is_integer()):
19691971
# e.g. test_setitem_series_int8 if we have a python int 1
@@ -1972,7 +1974,7 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
19721974
info = np.iinfo(dtype)
19731975
if info.min <= element <= info.max:
19741976
return dtype.type(element)
1975-
raise ValueError
1977+
raise LossySetitemError
19761978

19771979
if tipo is not None:
19781980
if tipo.kind not in ["i", "u"]:
@@ -1986,10 +1988,10 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
19861988
# np.putmask, whereas the raw values cannot.
19871989
# see TestSetitemFloatNDarrayIntoIntegerSeries
19881990
return casted
1989-
raise ValueError
1991+
raise LossySetitemError
19901992

19911993
# Anything other than integer we cannot hold
1992-
raise ValueError
1994+
raise LossySetitemError
19931995
elif (
19941996
dtype.kind == "u"
19951997
and isinstance(element, np.ndarray)
@@ -2001,31 +2003,31 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20012003
# TODO: faster to check (element >=0).all()? potential
20022004
# itemsize issues there?
20032005
return casted
2004-
raise ValueError
2006+
raise LossySetitemError
20052007
elif dtype.itemsize < tipo.itemsize:
2006-
raise ValueError
2008+
raise LossySetitemError
20072009
elif not isinstance(tipo, np.dtype):
20082010
# i.e. nullable IntegerDtype; we can put this into an ndarray
20092011
# losslessly iff it has no NAs
20102012
if element._hasna:
2011-
raise ValueError
2013+
raise LossySetitemError
20122014
return element
20132015

20142016
return element
20152017

2016-
raise ValueError
2018+
raise LossySetitemError
20172019

20182020
elif dtype.kind == "f":
20192021
if tipo is not None:
20202022
# TODO: itemsize check?
20212023
if tipo.kind not in ["f", "i", "u"]:
20222024
# Anything other than float/integer we cannot hold
2023-
raise ValueError
2025+
raise LossySetitemError
20242026
elif not isinstance(tipo, np.dtype):
20252027
# i.e. nullable IntegerDtype or FloatingDtype;
20262028
# we can put this into an ndarray losslessly iff it has no NAs
20272029
if element._hasna:
2028-
raise ValueError
2030+
raise LossySetitemError
20292031
return element
20302032
elif tipo.itemsize > dtype.itemsize:
20312033
if isinstance(element, np.ndarray):
@@ -2034,13 +2036,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20342036
# TODO(np>=1.20): we can just use np.array_equal with equal_nan
20352037
if array_equivalent(casted, element):
20362038
return casted
2037-
raise ValueError
2039+
raise LossySetitemError
20382040

20392041
return element
20402042

20412043
if lib.is_integer(element) or lib.is_float(element):
20422044
return element
2043-
raise ValueError
2045+
raise LossySetitemError
20442046

20452047
elif dtype.kind == "c":
20462048
if lib.is_integer(element) or lib.is_complex(element) or lib.is_float(element):
@@ -2052,13 +2054,13 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20522054
if casted == element:
20532055
return casted
20542056
# otherwise e.g. overflow see test_32878_complex_itemsize
2055-
raise ValueError
2057+
raise LossySetitemError
20562058

20572059
if tipo is not None:
20582060
if tipo.kind in ["c", "f", "i", "u"]:
20592061
return element
2060-
raise ValueError
2061-
raise ValueError
2062+
raise LossySetitemError
2063+
raise LossySetitemError
20622064

20632065
elif dtype.kind == "b":
20642066
if tipo is not None:
@@ -2067,23 +2069,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any:
20672069
# i.e. we have a BooleanArray
20682070
if element._hasna:
20692071
# i.e. there are pd.NA elements
2070-
raise ValueError
2072+
raise LossySetitemError
20712073
return element
2072-
raise ValueError
2074+
raise LossySetitemError
20732075
if lib.is_bool(element):
20742076
return element
2075-
raise ValueError
2077+
raise LossySetitemError
20762078

20772079
elif dtype.kind == "S":
20782080
# TODO: test tests.frame.methods.test_replace tests get here,
20792081
# need more targeted tests. xref phofl has a PR about this
20802082
if tipo is not None:
20812083
if tipo.kind == "S" and tipo.itemsize <= dtype.itemsize:
20822084
return element
2083-
raise ValueError
2085+
raise LossySetitemError
20842086
if isinstance(element, bytes) and len(element) <= dtype.itemsize:
20852087
return element
2086-
raise ValueError
2088+
raise LossySetitemError
20872089

20882090
raise NotImplementedError(dtype)
20892091

@@ -2097,3 +2099,11 @@ def _dtype_can_hold_range(rng: range, dtype: np.dtype) -> bool:
20972099
if not len(rng):
20982100
return True
20992101
return np.can_cast(rng[0], dtype) and np.can_cast(rng[-1], dtype)
2102+
2103+
2104+
class LossySetitemError(Exception):
2105+
"""
2106+
Raised when trying to do a __setitem__ on an np.ndarray that is not lossless.
2107+
"""
2108+
2109+
pass

pandas/core/frame.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@
9393
)
9494

9595
from pandas.core.dtypes.cast import (
96+
LossySetitemError,
9697
can_hold_element,
9798
construct_1d_arraylike_from_scalar,
9899
construct_2d_arraylike_from_scalar,
@@ -3882,11 +3883,11 @@ def _set_value(
38823883
series = self._get_item_cache(col)
38833884
loc = self.index.get_loc(index)
38843885

3885-
# setitem_inplace will do validation that may raise TypeError
3886-
# or ValueError
3886+
# setitem_inplace will do validation that may raise TypeError,
3887+
# ValueError, or LossySetitemError
38873888
series._mgr.setitem_inplace(loc, value)
38883889

3889-
except (KeyError, TypeError, ValueError):
3890+
except (KeyError, TypeError, ValueError, LossySetitemError):
38903891
# set using a non-recursive method & reset the cache
38913892
if takeable:
38923893
self.iloc[index, col] = value

pandas/core/indexes/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868

6969
from pandas.core.dtypes.astype import astype_nansafe
7070
from pandas.core.dtypes.cast import (
71+
LossySetitemError,
7172
can_hold_element,
7273
common_dtype_categorical_compat,
7374
ensure_dtype_can_hold_na,
@@ -5071,12 +5072,13 @@ def _validate_fill_value(self, value):
50715072
"""
50725073
dtype = self.dtype
50735074
if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
5075+
# return np_can_hold_element(dtype, value)
50745076
try:
50755077
return np_can_hold_element(dtype, value)
5076-
except ValueError as err:
5078+
except LossySetitemError as err:
50775079
# re-raise as TypeError for consistency
50785080
raise TypeError from err
5079-
if not can_hold_element(self._values, value):
5081+
elif not can_hold_element(self._values, value):
50805082
raise TypeError
50815083
return value
50825084

@@ -5294,7 +5296,7 @@ def putmask(self, mask, value) -> Index:
52945296
value = self._na_value
52955297
try:
52965298
converted = self._validate_fill_value(value)
5297-
except (ValueError, TypeError) as err:
5299+
except (LossySetitemError, ValueError, TypeError) as err:
52985300
if is_object_dtype(self): # pragma: no cover
52995301
raise err
53005302

@@ -6719,7 +6721,7 @@ def insert(self, loc: int, item) -> Index:
67196721
return type(self)._simple_new(res_values, name=self.name)
67206722
else:
67216723
item = self._validate_fill_value(item)
6722-
except (TypeError, ValueError):
6724+
except (TypeError, ValueError, LossySetitemError):
67236725
# e.g. trying to insert an integer into a DatetimeIndex
67246726
# We cannot keep the same dtype, so cast to the (often object)
67256727
# minimal shared dtype before doing the insert.

pandas/core/internals/blocks.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
from pandas.core.dtypes.astype import astype_array_safe
3838
from pandas.core.dtypes.cast import (
39+
LossySetitemError,
3940
can_hold_element,
4041
find_result_type,
4142
maybe_downcast_to_dtype,
@@ -1191,7 +1192,7 @@ def where(self, other, cond) -> list[Block]:
11911192
# but this gets us back 'casted' which we will re-use below;
11921193
# without using 'casted', expressions.where may do unwanted upcasts.
11931194
casted = np_can_hold_element(values.dtype, other)
1194-
except (ValueError, TypeError):
1195+
except (ValueError, TypeError, LossySetitemError):
11951196
# we cannot coerce, return a compat dtype
11961197
block = self.coerce_to_target_dtype(other)
11971198
blocks = block.where(orig_other, cond)

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
)
6363

6464
from pandas.core.dtypes.cast import (
65+
LossySetitemError,
6566
convert_dtypes,
6667
maybe_box_native,
6768
maybe_cast_pointwise_result,
@@ -1102,7 +1103,7 @@ def __setitem__(self, key, value) -> None:
11021103
# GH#12862 adding a new key to the Series
11031104
self.loc[key] = value
11041105

1105-
except (TypeError, ValueError):
1106+
except (TypeError, ValueError, LossySetitemError):
11061107
# The key was OK, but we cannot set the value losslessly
11071108
indexer = self.index.get_loc(key)
11081109
self._set_values(indexer, value)

0 commit comments

Comments
 (0)