diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c26f8288f59ab..cfc5ac329a847 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -1008,6 +1008,7 @@ Other - Bug in :meth:`DataFrame.agg()` not sorting the aggregated axis in the order of the provided aggragation functions when one or more aggregation function fails to produce results (:issue:`33634`) - Bug in :meth:`DataFrame.clip` not interpreting missing values as no threshold (:issue:`40420`) - Bug in :class:`Series` backed by :class:`DatetimeArray` or :class:`TimedeltaArray` sometimes failing to set the array's ``freq`` to ``None`` (:issue:`41425`) +- Bug in creating a :class:`Series` from a ``range`` object that does not fit in the bounds of ``int64`` dtype (:issue:`30173`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/construction.py b/pandas/core/construction.py index e83aa02f25ada..b8828f838b94c 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -502,7 +502,7 @@ def sanitize_array( data = lib.item_from_zerodim(data) elif isinstance(data, range): # GH#16804 - data = np.arange(data.start, data.stop, data.step, dtype="int64") + data = range_to_ndarray(data) copy = False if not is_list_like(data): @@ -569,6 +569,25 @@ def sanitize_array( return subarr +def range_to_ndarray(rng: range) -> np.ndarray: + """ + Cast a range object to ndarray. + """ + # GH#30171 perf avoid realizing range as a list in np.array + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="int64") + except OverflowError: + # GH#30173 handling for ranges that overflow int64 + if (rng.start >= 0 and rng.step > 0) or (rng.stop >= 0 and rng.step < 0): + try: + arr = np.arange(rng.start, rng.stop, rng.step, dtype="uint64") + except OverflowError: + arr = construct_1d_object_array_from_listlike(list(rng)) + else: + arr = construct_1d_object_array_from_listlike(list(rng)) + return arr + + def _sanitize_ndim( result: ArrayLike, data, dtype: DtypeObj | None, index: Index | None ) -> ArrayLike: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 5e58f6148e6ad..5c2bed109e3bf 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -66,6 +66,7 @@ from pandas.core.construction import ( ensure_wrapped_if_datetimelike, extract_array, + range_to_ndarray, sanitize_array, ) from pandas.core.indexes import base as ibase @@ -530,7 +531,7 @@ def _prep_ndarray(values, copy: bool = True) -> np.ndarray: if len(values) == 0: return np.empty((0, 0), dtype=object) elif isinstance(values, range): - arr = np.arange(values.start, values.stop, values.step, dtype="int64") + arr = range_to_ndarray(values) return arr[..., np.newaxis] def convert(v): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index e74d900d1b04d..0e1c98e381ff7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1525,6 +1525,36 @@ def test_constructor_range_dtype(self, dtype): result = Series(range(5), dtype=dtype) tm.assert_series_equal(result, expected) + def test_constructor_range_overflows(self): + # GH#30173 range objects that overflow int64 + rng = range(2 ** 63, 2 ** 63 + 4) + ser = Series(rng) + expected = Series(list(rng)) + tm.assert_series_equal(ser, expected) + assert list(ser) == list(rng) + assert ser.dtype == np.uint64 + + rng2 = range(2 ** 63 + 4, 2 ** 63, -1) + ser2 = Series(rng2) + expected2 = Series(list(rng2)) + tm.assert_series_equal(ser2, expected2) + assert list(ser2) == list(rng2) + assert ser2.dtype == np.uint64 + + rng3 = range(-(2 ** 63), -(2 ** 63) - 4, -1) + ser3 = Series(rng3) + expected3 = Series(list(rng3)) + tm.assert_series_equal(ser3, expected3) + assert list(ser3) == list(rng3) + assert ser3.dtype == object + + rng4 = range(2 ** 73, 2 ** 73 + 4) + ser4 = Series(rng4) + expected4 = Series(list(rng4)) + tm.assert_series_equal(ser4, expected4) + assert list(ser4) == list(rng4) + assert ser4.dtype == object + def test_constructor_tz_mixed_data(self): # GH 13051 dt_list = [