From 06bcced8cfbaaae4dec9a80f4de9f2a21198f754 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 8 Jan 2022 10:42:40 -0800 Subject: [PATCH 1/3] BUG: can_hold_element size checks on ints/floats --- pandas/core/dtypes/cast.py | 25 ++++++++----------- .../dtypes/cast/test_can_hold_element.py | 9 +++++++ pandas/tests/frame/indexing/test_where.py | 10 +++++++- pandas/tests/indexing/test_loc.py | 11 ++++++++ pandas/tests/series/indexing/test_setitem.py | 19 ++++++++------ 5 files changed, 50 insertions(+), 24 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 80271f04d4449..c9ca2ad77707a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1852,16 +1852,23 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: + info = np.iinfo(dtype) + if isinstance(element, range): if _dtype_can_hold_range(element, dtype): return element raise ValueError + elif is_integer(element) or (is_float(element) and element.is_integer()): + # e.g. test_setitem_series_int8 if we have a python int 1 + # tipo may be np.int32, despite the fact that it will fit + # in smaller int dtypes. + if info.min <= element <= info.max: + return element + raise ValueError + if tipo is not None: if tipo.kind not in ["i", "u"]: - if is_float(element) and element.is_integer(): - return element - if isinstance(element, np.ndarray) and element.dtype.kind == "f": # If all can be losslessly cast to integers, then we can hold them # We do something similar in putmask_smart @@ -1889,14 +1896,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return casted raise ValueError elif dtype.itemsize < tipo.itemsize: - if is_integer(element): - # e.g. test_setitem_series_int8 if we have a python int 1 - # tipo may be np.int32, despite the fact that it will fit - # in smaller int dtypes. - info = np.iinfo(dtype) - if info.min <= element <= info.max: - return element - raise ValueError raise ValueError elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype; we can put this into an ndarray @@ -1909,10 +1908,6 @@ def np_can_hold_element(dtype: np.dtype, element: Any) -> Any: return element - # We have not inferred an integer from the dtype - # check if we have a builtin int or a float equal to an int - if is_integer(element) or (is_float(element) and element.is_integer()): - return element raise ValueError elif dtype.kind == "f": diff --git a/pandas/tests/dtypes/cast/test_can_hold_element.py b/pandas/tests/dtypes/cast/test_can_hold_element.py index 906123b1aee74..e2e9cc24ce9bf 100644 --- a/pandas/tests/dtypes/cast/test_can_hold_element.py +++ b/pandas/tests/dtypes/cast/test_can_hold_element.py @@ -68,3 +68,12 @@ def test_can_hold_element_int8_int(): assert can_hold_element(arr, np.uint32(element)) assert can_hold_element(arr, np.int64(element)) assert can_hold_element(arr, np.uint64(element)) + + element = 2 ** 9 + assert not can_hold_element(arr, element) + assert not can_hold_element(arr, np.int16(element)) + assert not can_hold_element(arr, np.uint16(element)) + assert not can_hold_element(arr, np.int32(element)) + assert not can_hold_element(arr, np.uint32(element)) + assert not can_hold_element(arr, np.int64(element)) + assert not can_hold_element(arr, np.uint64(element)) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 197c3ac9bd225..62a0eadd48558 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -753,8 +753,16 @@ def test_where_try_cast_deprecated(frame_or_series): obj.where(mask, -1, try_cast=False) -def test_where_int_downcasting_deprecated(using_array_manager): +def test_where_int_downcasting_deprecated(using_array_manager, request): # GH#44597 + if not using_array_manager: + mark = pytest.mark.xfail( + reason="After fixing a bug in can_hold_element, we don't go through " + "the deprecated path, and also up-cast to int64 instead of int32 " + "(for now)." + ) + request.node.add_marker(mark) + arr = np.arange(6).astype(np.int16).reshape(3, 2) df = DataFrame(arr) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 63c5091865160..699018f08adee 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2734,6 +2734,17 @@ def test_loc_getitem_nullable_index_with_duplicates(): tm.assert_series_equal(res, expected) +def test_loc_setitem_uint8_upcast(): + # GH#26049 + + df = DataFrame([1, 2, 3, 4], columns=["col1"], dtype="uint8") + df.loc[2, "col1"] = 300 # value that can't be held in uint8 + + # TODO: would be better to get uint16? + expected = DataFrame([1, 2, 300, 4], columns=["col1"], dtype="int64") + tm.assert_frame_equal(df, expected) + + class TestLocSeries: @pytest.mark.parametrize("val,expected", [(2 ** 63 - 1, 3), (2 ** 63, 4)]) def test_loc_uint64(self, val, expected): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 94a5ca38afce4..8a41a5de468cc 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -1079,16 +1079,25 @@ def expected(self): def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request): if not isinstance(val, np.int16): + # with python int we end up with int64 mark = pytest.mark.xfail request.node.add_marker(mark) super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace) def test_mask_key(self, obj, key, expected, val, indexer_sli, request): if not isinstance(val, np.int16): + # with python int we end up with int64 mark = pytest.mark.xfail request.node.add_marker(mark) super().test_mask_key(obj, key, expected, val, indexer_sli) + def test_series_where(self, obj, key, expected, val, is_inplace, request): + if not isinstance(val, np.int16): + # with python int we end up with int64 + mark = pytest.mark.xfail + request.node.add_marker(mark) + super().test_series_where(obj, key, expected, val, is_inplace) + @pytest.mark.parametrize("val", [2 ** 33 + 1.0, 2 ** 33 + 1.1, 2 ** 62]) class TestSmallIntegerSetitemUpcast(SetitemCastingEquivalents): @@ -1109,20 +1118,14 @@ def expected(self, val): dtype = "i8" return Series([val, 2, 3], dtype=dtype) - def test_series_where(self, obj, key, expected, val, is_inplace, request): - if isinstance(val, float) and val % 1 == 0: - mark = pytest.mark.xfail - request.node.add_marker(mark) - super().test_series_where(obj, key, expected, val, is_inplace) - def test_int_key(self, obj, key, expected, val, indexer_sli, is_inplace, request): - if val % 1 == 0: + if val % 1 == 0 and isinstance(val, float): mark = pytest.mark.xfail request.node.add_marker(mark) super().test_int_key(obj, key, expected, val, indexer_sli, is_inplace) def test_mask_key(self, obj, key, expected, val, indexer_sli, request): - if val % 1 == 0: + if val % 1 == 0 and isinstance(val, float): mark = pytest.mark.xfail request.node.add_marker(mark) super().test_mask_key(obj, key, expected, val, indexer_sli) From d6af3551535c3453da963f7d89bec8546bc7e855 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 8 Jan 2022 12:14:06 -0800 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2cbc7b06b89df..730e09c12e2a8 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -154,6 +154,7 @@ Interval Indexing ^^^^^^^^ - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) +- Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`) - Missing From a39c9a92c4c9611698811a792d8f9089fcb8bce8 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 8 Jan 2022 13:06:56 -0800 Subject: [PATCH 3/3] xfail on ArrayManageR --- pandas/tests/frame/indexing/test_where.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 62a0eadd48558..2ee777cf53d29 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -753,16 +753,13 @@ def test_where_try_cast_deprecated(frame_or_series): obj.where(mask, -1, try_cast=False) +@pytest.mark.xfail( + reason="After fixing a bug in can_hold_element, we don't go through " + "the deprecated path, and also up-cast to int64 instead of int32 " + "(for now)." +) def test_where_int_downcasting_deprecated(using_array_manager, request): # GH#44597 - if not using_array_manager: - mark = pytest.mark.xfail( - reason="After fixing a bug in can_hold_element, we don't go through " - "the deprecated path, and also up-cast to int64 instead of int32 " - "(for now)." - ) - request.node.add_marker(mark) - arr = np.arange(6).astype(np.int16).reshape(3, 2) df = DataFrame(arr)