From c76953c48b129a976b71daf92c75e4310beda538 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 7 Sep 2021 16:01:51 -0400 Subject: [PATCH 1/5] WIP --- doc/source/whatsnew/v1.3.3.rst | 1 + pandas/core/array_algos/putmask.py | 2 +- pandas/core/dtypes/cast.py | 3 ++- pandas/tests/series/methods/test_fillna.py | 8 ++++++++ 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 6ba623b9fdbe5..f7cdeac81d670 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -25,6 +25,7 @@ Fixed regressions - Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`) - Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`) - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) +- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when specifying ``value`` argument with dtypes of larger size then .. (:issue:`43424`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 3a67f7d871f86..a30b69c9f10f7 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -114,7 +114,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: new = np.asarray(new) - if values.dtype.kind == new.dtype.kind: + if values.dtype.kind == new.dtype.kind or (values.dtype.kind == "f" and new.dtype.kind in ["i", "u"]): # preserves dtype if possible return _putmask_preserve(values, new, mask) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6f10a490c7147..0dff7b9468ceb 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2208,10 +2208,11 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: elif dtype.kind == "f": if tipo is not None: - # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold return False + # elif dtype.itemsize < tipo.itemsize: + # return False elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 03e126587ce1a..e6b65f560dc5c 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -869,6 +869,14 @@ def test_fillna_int(self): assert return_value is None tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) + @pytest.mark.parametrize("filler", [0.5, 1, np.uint8(2), np.int16(-2), np.uint64(2), np.int64(32)]) + def test_fillna_value_f32(self, filler): + # GH-43424 + ser = Series([np.nan, 1.2], dtype="float32") + result = ser.fillna(value={0: filler}) + expected = Series([filler, 1.2], dtype="float32") + tm.assert_series_equal(result, expected) + def test_datetime64tz_fillna_round_issue(self): # GH#14872 From d95c157feddf8d63662bed59489898212423cd9c Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 7 Sep 2021 22:12:50 -0400 Subject: [PATCH 2/5] REGR: fillna on f32 column raising for f64 --- doc/source/whatsnew/v1.3.3.rst | 4 +++- pandas/core/array_algos/putmask.py | 4 +++- pandas/core/dtypes/cast.py | 4 ++-- pandas/tests/indexing/test_indexing.py | 18 ++++++++++++++++ pandas/tests/series/methods/test_fillna.py | 24 ++++++++++++++-------- 5 files changed, 42 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index f7cdeac81d670..5e1f8989b9d7c 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -25,7 +25,8 @@ Fixed regressions - Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`) - Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`) - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) -- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when specifying ``value`` argument with dtypes of larger size then .. (:issue:`43424`) +- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float32`` ``Series`` with list-like fill value having a larger dtype (like ``float64``) (:issue:`43424`) +- .. --------------------------------------------------------------------------- @@ -44,6 +45,7 @@ Performance improvements Bug fixes ~~~~~~~~~ - Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) +- Bug in setting a ``float32`` :class:`Series` with a larger dtype (like ``float64``) not upcasting (:issue:`43424`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index a30b69c9f10f7..86f9caead238b 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -114,7 +114,9 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: new = np.asarray(new) - if values.dtype.kind == new.dtype.kind or (values.dtype.kind == "f" and new.dtype.kind in ["i", "u"]): + values_kind = values.dtype.kind + new_kind = new.dtype.kind + if values_kind == new_kind or (values_kind == "f" and new_kind in ["i", "u"]): # preserves dtype if possible return _putmask_preserve(values, new, mask) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 0dff7b9468ceb..4e4404c427735 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2211,8 +2211,8 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold return False - # elif dtype.itemsize < tipo.itemsize: - # return False + elif dtype.itemsize < tipo.itemsize: + return False elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 01407f1f9bae7..fd9f91b1ec1b8 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -200,6 +200,24 @@ def test_setitem_dtype_upcast3(self): assert is_float_dtype(left["foo"]) assert is_float_dtype(left["baz"]) + @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) + def test_setitem_dtype_float_listlike_upcasts(self, dtype, indexer_sli): + # GH-43424 + result = Series([1.1, 1.1], dtype=np.float32) + indexer_sli(result)[[0]] = np.array([1], dtype=dtype) + expected = Series([1.0, 1.1], dtype=np.float64) + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( + "dtype,new_dtype", [(np.int8, np.int64), (np.uint16, np.uint32)] + ) + def test_setitem_dtype_int_listlike_upcasts(self, dtype, new_dtype, indexer_sli): + # GH-43424 + result = Series([1, 1], dtype=dtype) + indexer_sli(result)[[0]] = np.array([2], dtype=new_dtype) + expected = Series([2, 1], dtype=new_dtype) + tm.assert_series_equal(result, expected) + def test_dups_fancy_indexing(self): # GH 3455 diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index e6b65f560dc5c..3863e9a3d7c42 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -702,6 +702,22 @@ def test_fillna_categorical_raises(self): with pytest.raises(TypeError, match=msg): ser.fillna(DataFrame({1: ["a"], 3: ["b"]})) + @pytest.mark.parametrize("fill_type", [np.float64, np.uint64, np.int64]) + def test_fillna_f32_upcast(self, fill_type): + # GH-43424 + ser = Series([np.nan, 1.2], dtype=np.float32) + fill_values = Series([2, 2], dtype=fill_type) + result = ser.fillna(fill_values) + expected = Series([2.0, 1.2], dtype="float64") + tm.assert_series_equal(result, expected) + + def test_fillna_f32_upcast_with_dict(self): + # GH-43424 + ser = Series([np.nan, 1.2], dtype=np.float32) + result = ser.fillna({0: 1}) + expected = Series([1.0, 1.2]) + tm.assert_series_equal(result, expected) + # --------------------------------------------------------------- # Invalid Usages @@ -869,14 +885,6 @@ def test_fillna_int(self): assert return_value is None tm.assert_series_equal(ser.fillna(method="ffill", inplace=False), ser) - @pytest.mark.parametrize("filler", [0.5, 1, np.uint8(2), np.int16(-2), np.uint64(2), np.int64(32)]) - def test_fillna_value_f32(self, filler): - # GH-43424 - ser = Series([np.nan, 1.2], dtype="float32") - result = ser.fillna(value={0: filler}) - expected = Series([filler, 1.2], dtype="float32") - tm.assert_series_equal(result, expected) - def test_datetime64tz_fillna_round_issue(self): # GH#14872 From 319992ae985e3662d888ecbbee249ea79f9062bf Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Wed, 8 Sep 2021 18:04:09 -0400 Subject: [PATCH 3/5] Use less invasive fix --- doc/source/whatsnew/v1.3.3.rst | 3 +-- pandas/core/array_algos/putmask.py | 14 +++++++++----- pandas/core/dtypes/cast.py | 13 ++++++------- pandas/tests/indexing/test_indexing.py | 18 ------------------ pandas/tests/series/methods/test_fillna.py | 11 ++++++----- 5 files changed, 22 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index 5e1f8989b9d7c..908fee3501e93 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -25,7 +25,7 @@ Fixed regressions - Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`) - Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`) - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) -- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float32`` ``Series`` with list-like fill value having a larger dtype (like ``float64``) (:issue:`43424`) +- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float32`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float64``) (:issue:`43424`) - .. --------------------------------------------------------------------------- @@ -45,7 +45,6 @@ Performance improvements Bug fixes ~~~~~~~~~ - Bug in :meth:`.DataFrameGroupBy.agg` and :meth:`.DataFrameGroupBy.transform` with ``engine="numba"`` where ``index`` data was not being correctly passed into ``func`` (:issue:`43133`) -- Bug in setting a ``float32`` :class:`Series` with a larger dtype (like ``float64``) not upcasting (:issue:`43424`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/array_algos/putmask.py b/pandas/core/array_algos/putmask.py index 86f9caead238b..a8f69497d4019 100644 --- a/pandas/core/array_algos/putmask.py +++ b/pandas/core/array_algos/putmask.py @@ -41,8 +41,14 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None: if lib.is_scalar(value) and isinstance(values, np.ndarray): value = convert_scalar_for_putitemlike(value, values.dtype) - if not isinstance(values, np.ndarray) or ( - values.dtype == object and not lib.is_scalar(value) + if ( + not isinstance(values, np.ndarray) + or (values.dtype == object and not lib.is_scalar(value)) + # GH#43424: np.putmask raises TypeError if we cannot cast between types with + # rule = "safe", a stricter guarantee we may not have here + or ( + isinstance(value, np.ndarray) and not np.can_cast(value.dtype, values.dtype) + ) ): # GH#19266 using np.putmask gives unexpected results with listlike value if is_list_like(value) and len(value) == len(values): @@ -114,9 +120,7 @@ def putmask_smart(values: np.ndarray, mask: np.ndarray, new) -> np.ndarray: new = np.asarray(new) - values_kind = values.dtype.kind - new_kind = new.dtype.kind - if values_kind == new_kind or (values_kind == "f" and new_kind in ["i", "u"]): + if values.dtype.kind == new.dtype.kind: # preserves dtype if possible return _putmask_preserve(values, new, mask) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4e4404c427735..d6a8790afd998 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -2185,6 +2185,11 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: # ExtensionBlock._can_hold_element return True + # error: Non-overlapping equality check (left operand type: "dtype[Any]", right + # operand type: "Type[object]") + if dtype == object: # type: ignore[comparison-overlap] + return True + tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: @@ -2208,11 +2213,10 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: elif dtype.kind == "f": if tipo is not None: + # TODO: itemsize check? if tipo.kind not in ["f", "i", "u"]: # Anything other than float/integer we cannot hold return False - elif dtype.itemsize < tipo.itemsize: - return False elif not isinstance(tipo, np.dtype): # i.e. nullable IntegerDtype or FloatingDtype; # we can put this into an ndarray losslessly iff it has no NAs @@ -2233,11 +2237,6 @@ def can_hold_element(arr: ArrayLike, element: Any) -> bool: return tipo.kind == "b" return lib.is_bool(element) - # error: Non-overlapping equality check (left operand type: "dtype[Any]", right - # operand type: "Type[object]") - elif dtype == object: # type: ignore[comparison-overlap] - return True - elif dtype.kind == "S": # TODO: test tests.frame.methods.test_replace tests get here, # need more targeted tests. xref phofl has a PR about this diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index fd9f91b1ec1b8..01407f1f9bae7 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -200,24 +200,6 @@ def test_setitem_dtype_upcast3(self): assert is_float_dtype(left["foo"]) assert is_float_dtype(left["baz"]) - @pytest.mark.parametrize("dtype", [np.int64, np.uint64, np.float64]) - def test_setitem_dtype_float_listlike_upcasts(self, dtype, indexer_sli): - # GH-43424 - result = Series([1.1, 1.1], dtype=np.float32) - indexer_sli(result)[[0]] = np.array([1], dtype=dtype) - expected = Series([1.0, 1.1], dtype=np.float64) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( - "dtype,new_dtype", [(np.int8, np.int64), (np.uint16, np.uint32)] - ) - def test_setitem_dtype_int_listlike_upcasts(self, dtype, new_dtype, indexer_sli): - # GH-43424 - result = Series([1, 1], dtype=dtype) - indexer_sli(result)[[0]] = np.array([2], dtype=new_dtype) - expected = Series([2, 1], dtype=new_dtype) - tm.assert_series_equal(result, expected) - def test_dups_fancy_indexing(self): # GH 3455 diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 3863e9a3d7c42..947681da9b23d 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -702,20 +702,21 @@ def test_fillna_categorical_raises(self): with pytest.raises(TypeError, match=msg): ser.fillna(DataFrame({1: ["a"], 3: ["b"]})) - @pytest.mark.parametrize("fill_type", [np.float64, np.uint64, np.int64]) - def test_fillna_f32_upcast(self, fill_type): + @pytest.mark.parametrize("dtype", [float, "float32", "float64"]) + @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES) + def test_fillna_f32_upcast(self, dtype, fill_type): # GH-43424 - ser = Series([np.nan, 1.2], dtype=np.float32) + ser = Series([np.nan, 1.2], dtype=dtype) fill_values = Series([2, 2], dtype=fill_type) result = ser.fillna(fill_values) - expected = Series([2.0, 1.2], dtype="float64") + expected = Series([2.0, 1.2], dtype=dtype) tm.assert_series_equal(result, expected) def test_fillna_f32_upcast_with_dict(self): # GH-43424 ser = Series([np.nan, 1.2], dtype=np.float32) result = ser.fillna({0: 1}) - expected = Series([1.0, 1.2]) + expected = Series([1.0, 1.2], dtype=np.float32) tm.assert_series_equal(result, expected) # --------------------------------------------------------------- From 10ed55aa8fb864973fbc2ab299a702e545c14028 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Wed, 8 Sep 2021 18:04:45 -0400 Subject: [PATCH 4/5] Change test name --- pandas/tests/series/methods/test_fillna.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index 947681da9b23d..8361ec6c6b5fa 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -704,7 +704,7 @@ def test_fillna_categorical_raises(self): @pytest.mark.parametrize("dtype", [float, "float32", "float64"]) @pytest.mark.parametrize("fill_type", tm.ALL_REAL_NUMPY_DTYPES) - def test_fillna_f32_upcast(self, dtype, fill_type): + def test_fillna_float_casting(self, dtype, fill_type): # GH-43424 ser = Series([np.nan, 1.2], dtype=dtype) fill_values = Series([2, 2], dtype=fill_type) From d439764811672bc2e0b6a98e3f3a47c324144eac Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Wed, 8 Sep 2021 18:11:52 -0400 Subject: [PATCH 5/5] Clarify whatsnew applies to any float --- doc/source/whatsnew/v1.3.3.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst index e6c0a08666f3b..df09873742edd 100644 --- a/doc/source/whatsnew/v1.3.3.rst +++ b/doc/source/whatsnew/v1.3.3.rst @@ -26,7 +26,7 @@ Fixed regressions - Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`) - Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`) - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`) -- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float32`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float64``) (:issue:`43424`) +- Fixed regression in :meth:`Series.fillna` raising ``TypeError`` when filling ``float`` ``Series`` with list-like fill value having a dtype which couldn't cast lostlessly (like ``float32`` filled with ``float64``) (:issue:`43424`) - .. ---------------------------------------------------------------------------