From ba6db3e76e70df6500116501f01b4ce90cf21ce5 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 19 May 2021 20:21:07 -0700 Subject: [PATCH] BUG: Series(range_obj_outside_i8_bounds) --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/construction.py | 21 ++++++++++++++++- pandas/core/internals/construction.py | 3 ++- pandas/tests/series/test_constructors.py | 30 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 1eb22436204a8..7e913e8626b41 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -975,6 +975,7 @@ Other - Bug in :meth:`DataFrame.convert_dtypes` incorrectly raised ValueError when called on an empty DataFrame (:issue:`40393`) - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) - Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`) +- Bug in creating a :class:`Series` from a ``range`` object that does not fit in the bounds of ``int64`` dtype (:issue:`30173`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 0fef02b1489ac..01e87f8434959 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -543,7 +543,7 @@ def sanitize_array( elif isinstance(data, range): # GH#16804 - arr = np.arange(data.start, data.stop, data.step, dtype="int64") + arr = range_to_ndarray(data) subarr = _try_cast(arr, dtype, copy, raise_cast_failure) elif not is_list_like(data): @@ -574,6 +574,25 @@ def sanitize_array( return subarr +def range_to_ndarray(rng: range) -> np.ndarray: + """ + Cast a range object to ndarray. + """ + # GH#30171 perf avoid realizing range as a list in np.array + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") + except OverflowError: + # GH#30173 handling for ranges that overflow int64 + if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0): + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") + except OverflowError: + arr = construct_1d_object_array_from_listlike(list(rng)) + else: + arr = construct_1d_object_array_from_listlike(list(rng)) + return arr + + def _sanitize_ndim( result: ArrayLike, data, dtype: DtypeObj | None, index: Index | None ) -> ArrayLike: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 06d30d6ed72e8..77f015260bff0 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -64,6 +64,7 @@ from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, + range_to_ndarray, sanitize_array, ) from pandas.core.indexes import base as ibase @@ -527,7 +528,7 @@ def _prep_ndarray(values, copy: bool = True) -> np.ndarray: if len(values) == 0: return np.empty((0, 0), dtype=object) elif isinstance(values, range): - arr = np.arange(values.start, values.stop, values.step, dtype="int64") + arr = range_to_ndarray(values) return arr[..., np.newaxis] def convert(v): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e74d900d1b04d..0e1c98e381ff7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1525,6 +1525,36 @@ def test_constructor_range_dtype(self, dtype): result = Series(range(5), dtype=dtype) tm.assert_series_equal(result, expected) + def test_constructor_range_overflows(self): + # GH#30173 range objects that overflow int64 + rng = range(2 ** 63, 2 ** 63 + 4) + ser = Series(rng) + expected = Series(list(rng)) + tm.assert_series_equal(ser, expected) + assert list(ser) == list(rng) + assert ser.dtype == np.uint64 + + rng2 = range(2 ** 63 + 4, 2 ** 63, -1) + ser2 = Series(rng2) + expected2 = Series(list(rng2)) + tm.assert_series_equal(ser2, expected2) + assert list(ser2) == list(rng2) + assert ser2.dtype == np.uint64 + + rng3 = range(-(2 ** 63), -(2 ** 63) - 4, -1) + ser3 = Series(rng3) + expected3 = Series(list(rng3)) + tm.assert_series_equal(ser3, expected3) + assert list(ser3) == list(rng3) + assert ser3.dtype == object + + rng4 = range(2 ** 73, 2 ** 73 + 4) + ser4 = Series(rng4) + expected4 = Series(list(rng4)) + tm.assert_series_equal(ser4, expected4) + assert list(ser4) == list(rng4) + assert ser4.dtype == object + def test_constructor_tz_mixed_data(self): # GH 13051 dt_list = [