From 280701632b7493f443a16f8dd8ca1aa12001c7a6 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Jan 2022 21:58:49 -0800 Subject: [PATCH 1/3] BUG: Float64Index.astype('u8') returns Int64Index --- doc/source/whatsnew/v1.5.0.rst | 2 ++ pandas/_testing/__init__.py | 13 +++++++++---- pandas/conftest.py | 2 +- pandas/core/arrays/interval.py | 8 +++++++- pandas/core/dtypes/astype.py | 4 ++++ pandas/core/indexes/numeric.py | 5 ++++- pandas/tests/base/test_misc.py | 2 +- pandas/tests/indexes/numeric/test_astype.py | 12 ++++++++++++ pandas/tests/series/methods/test_astype.py | 11 +++++++++++ 9 files changed, 51 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 0173807cb9bd0..905cf1b420523 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -138,6 +138,8 @@ Numeric Conversion ^^^^^^^^^^ - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) +- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrect casting to ``np.int64`` dtype (:issue:`??`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Strings diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index 0dfe3345b38e6..294c7daa74dc8 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -494,14 +494,19 @@ def all_timeseries_index_generator(k: int = 10) -> Iterable[Index]: # make series -def makeFloatSeries(name=None): +def make_rand_series(name=None, dtype=np.float64): index = makeStringIndex(_N) - return Series(np.random.randn(_N), index=index, name=name) + data = np.random.randn(_N) + data = data.astype(dtype, copy=False) + return Series(data, index=index, name=name) + + +def makeFloatSeries(name=None): + return make_rand_series(name=name) def makeStringSeries(name=None): - index = makeStringIndex(_N) - return Series(np.random.randn(_N), index=index, name=name) + return make_rand_series(name=name) def makeObjectSeries(name=None): diff --git a/pandas/conftest.py b/pandas/conftest.py index 9009484f8d386..6ea04a0d4541f 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -728,7 +728,7 @@ def series_with_multilevel_index(): _narrow_series = { - f"{dtype.__name__}-series": tm.makeFloatSeries(name="a").astype(dtype) + f"{dtype.__name__}-series": tm.make_rand_series(name="a", dtype=dtype) for dtype in tm.NARROW_NP_DTYPES } diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 9a1435c3f033d..747d044e36559 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -39,6 +39,7 @@ npt, ) from pandas.compat.numpy import function as nv +from pandas.errors import IntCastingNaNError from pandas.util._decorators import Appender from pandas.core.dtypes.common import ( @@ -906,7 +907,12 @@ def astype(self, dtype, copy: bool = True): # np.nan entries to int subtypes new_left = Index(self._left, copy=False).astype(dtype.subtype) new_right = Index(self._right, copy=False).astype(dtype.subtype) - except TypeError as err: + except IntCastingNaNError: + # e.g test_subtype_integer + raise + except (TypeError, ValueError) as err: + # e.g. test_subtype_integer_errors f8->u8 can be lossy + # and raises ValueError msg = ( f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible" ) diff --git a/pandas/core/dtypes/astype.py b/pandas/core/dtypes/astype.py index a4d94dfe82960..78bef2d61aad8 100644 --- a/pandas/core/dtypes/astype.py +++ b/pandas/core/dtypes/astype.py @@ -196,6 +196,10 @@ def _astype_float_to_int_nansafe( raise IntCastingNaNError( "Cannot convert non-finite values (NA or inf) to integer" ) + if dtype.kind == "u": + # GH#45151 + if not (values >= 0).all(): + raise ValueError(f"Cannot losslessly cast from {values.dtype} to {dtype}") return values.astype(dtype, copy=copy) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index fa32953c38cb0..6317223cb51f6 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -246,7 +246,10 @@ def astype(self, dtype, copy: bool = True): # GH 13149 arr = astype_nansafe(self._values, dtype=dtype) if isinstance(self, Float64Index): - return Int64Index(arr, name=self.name) + if dtype.kind == "i": + return Int64Index(arr, name=self.name) + else: + return UInt64Index(arr, name=self.name) else: return NumericIndex(arr, name=self.name, dtype=dtype) elif self._is_backward_compat_public_numeric_index: diff --git a/pandas/tests/base/test_misc.py b/pandas/tests/base/test_misc.py index f3be4749fb3aa..dca36f6ba89fb 100644 --- a/pandas/tests/base/test_misc.py +++ b/pandas/tests/base/test_misc.py @@ -130,7 +130,7 @@ def test_memory_usage_components_series(series_with_simple_index): @pytest.mark.parametrize("dtype", tm.NARROW_NP_DTYPES) def test_memory_usage_components_narrow_series(dtype): - series = tm.makeFloatSeries(name="a").astype(dtype) + series = tm.make_rand_series(name="a", dtype=dtype) total_usage = series.memory_usage(index=True) non_index_usage = series.memory_usage(index=False) index_usage = series.index.memory_usage() diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index 89f26e953400d..7b37754be3089 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -10,10 +10,22 @@ from pandas.core.indexes.api import ( Float64Index, Int64Index, + UInt64Index, ) class TestAstype: + def test_astype_float64_to_uint64(self): + # used to incorrectly return Int64Index + idx = Float64Index([0.0, 5.0, 10.0, 15.0, 20.0]) + result = idx.astype("u8") + expected = UInt64Index([0, 5, 10, 15, 20]) + tm.assert_index_equal(result, expected) + + idx_with_negatives = idx - 10 + with pytest.raises(ValueError, match="losslessly"): + idx_with_negatives.astype(np.uint64) + def test_astype_float64_to_object(self): float_index = Float64Index([0.0, 2.5, 5.0, 7.5, 10.0]) result = float_index.astype(object) diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index 8197722687e78..b51cf92e8fd8b 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -322,6 +322,17 @@ def test_astype_cast_object_int_fail(self, dtype): with pytest.raises(ValueError, match=msg): arr.astype(dtype) + def test_astype_float_to_uint_negatives_raise( + self, float_numpy_dtype, any_unsigned_int_numpy_dtype + ): + # GH#45151 + # TODO: same for EA float/uint dtypes + arr = np.arange(5).astype(float_numpy_dtype) - 3 # includes negatives + ser = Series(arr) + + with pytest.raises(ValueError, match="losslessly"): + ser.astype(any_unsigned_int_numpy_dtype) + def test_astype_cast_object_int(self): arr = Series(["1", "2", "3", "4"], dtype=object) result = arr.astype(int) From de96af86ceca9d958fe383c44154d304eebca86a Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Jan 2022 22:06:38 -0800 Subject: [PATCH 2/3] GH ref --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/indexes/numeric/test_astype.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 905cf1b420523..e975bdda4047b 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -138,7 +138,7 @@ Numeric Conversion ^^^^^^^^^^ - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) -- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrect casting to ``np.int64`` dtype (:issue:`??`) +- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - diff --git a/pandas/tests/indexes/numeric/test_astype.py b/pandas/tests/indexes/numeric/test_astype.py index 7b37754be3089..ee75f56eac7ce 100644 --- a/pandas/tests/indexes/numeric/test_astype.py +++ b/pandas/tests/indexes/numeric/test_astype.py @@ -16,7 +16,7 @@ class TestAstype: def test_astype_float64_to_uint64(self): - # used to incorrectly return Int64Index + # GH#45309 used to incorrectly return Int64Index idx = Float64Index([0.0, 5.0, 10.0, 15.0, 20.0]) result = idx.astype("u8") expected = UInt64Index([0, 5, 10, 15, 20]) From e86bbd06f260825003cdae2f0a68f75fe1302da2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 11 Jan 2022 12:33:53 -0800 Subject: [PATCH 3/3] remove xfail --- pandas/tests/indexes/interval/test_astype.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/interval/test_astype.py b/pandas/tests/indexes/interval/test_astype.py index bdb9c3f97e798..cac145aa30fd0 100644 --- a/pandas/tests/indexes/interval/test_astype.py +++ b/pandas/tests/indexes/interval/test_astype.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas.compat import is_platform_arm - from pandas.core.dtypes.dtypes import ( CategoricalDtype, IntervalDtype, @@ -170,7 +168,6 @@ def test_subtype_integer_with_non_integer_borders(self, subtype): ) tm.assert_index_equal(result, expected) - @pytest.mark.xfail(is_platform_arm(), reason="GH 41740") def test_subtype_integer_errors(self): # float64 -> uint64 fails with negative values index = interval_range(-10.0, 10.0)