diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e3f772ac026ab..f36447b142b6e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -302,7 +302,9 @@ Conversion - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) +- Bug in :meth:`DataFrame.diff` upcasting ``np.int8`` and ``np.int16`` to ``np.float32`` which is inconsistent with other methods like :meth:`DataFrame.shift` (:issue:`45562`) - Bug when comparing string and datetime64ns objects causing ``OverflowError`` exception. (:issue:`45506`) +- Strings ^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 8c10b62d83f9e..61383eb1ba3dc 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1617,9 +1617,9 @@ def diff(arr, n: int, axis: int = 0): # int8, int16 are incompatible with float64, # see https://github.com/cython/cython/issues/2646 if arr.dtype.name in ["int8", "int16"]: - dtype = np.float32 - else: - dtype = np.float64 + arr = arr.astype("int32") + + dtype = np.float64 orig_ndim = arr.ndim if orig_ndim == 1: diff --git a/pandas/tests/frame/methods/test_diff.py b/pandas/tests/frame/methods/test_diff.py index f61529659e9d5..1a8571067c9bf 100644 --- a/pandas/tests/frame/methods/test_diff.py +++ b/pandas/tests/frame/methods/test_diff.py @@ -296,8 +296,6 @@ def test_diff_all_int_dtype(self, any_int_numpy_dtype): df = DataFrame(range(5)) df = df.astype(any_int_numpy_dtype) result = df.diff() - expected_dtype = ( - "float32" if any_int_numpy_dtype in ("int8", "int16") else "float64" - ) + expected_dtype = "float64" expected = DataFrame([np.nan, 1.0, 1.0, 1.0, 1.0], dtype=expected_dtype) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index c989c0e0c94cd..3c049aee78a12 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -76,7 +76,7 @@ def test_group_diff_real(any_real_numpy_dtype): ) result = df.groupby("a")["b"].diff() exp_dtype = "float" - if any_real_numpy_dtype in ["int8", "int16", "float32"]: + if any_real_numpy_dtype == "float32": exp_dtype = "float32" expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 0916f0b45719e..d851489602df5 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2447,7 +2447,7 @@ def test_diff_ea_axis(self): def test_diff_low_precision_int(self, dtype): arr = np.array([0, 1, 1, 0, 0], dtype=dtype) result = algos.diff(arr, 1) - expected = np.array([np.nan, 1, 0, -1, 0], dtype="float32") + expected = np.array([np.nan, 1, 0, -1, 0], dtype="float64") tm.assert_numpy_array_equal(result, expected)