diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index a889292cacc99..6b768fddb7037 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -955,6 +955,7 @@ Conversion - Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`) - Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`). - Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`) +- Bug in ``DataFrame`` and ``Series`` constructors where ``range`` objects are converted to ``int32`` dtype on Windows instead of ``int64`` (:issue:`16804`) Indexing ^^^^^^^^ diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index b367fda002b74..3853ac017044c 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -100,6 +100,10 @@ def signature(f): 'varargs', 'keywords']) return argspec(args, defaults, varargs, keywords) + def get_range_parameters(data): + """Gets the start, stop, and step parameters from a range object""" + return data.start, data.stop, data.step + # have to explicitly put builtins into the namespace range = range map = map @@ -146,6 +150,24 @@ def bytes_to_str(b, encoding='ascii'): def signature(f): return inspect.getargspec(f) + def get_range_parameters(data): + """Gets the start, stop, and step parameters from a range object""" + # seems we only have indexing ops to infer + # rather than direct accessors + if len(data) > 1: + step = data[1] - data[0] + stop = data[-1] + step + start = data[0] + elif len(data): + start = data[0] + stop = data[0] + 1 + step = 1 + else: + start = stop = 0 + step = 1 + + return start, stop, step + # import iterator versions of these functions range = xrange intern = intern diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index b2e55d4826670..9cb01896424f7 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -10,7 +10,7 @@ is_int64_dtype) from pandas import compat -from pandas.compat import lrange, range +from pandas.compat import lrange, range, get_range_parameters from pandas.compat.numpy import function as nv from pandas.core.common import _all_none from pandas.core.indexes.base import Index, _index_shared_docs @@ -113,24 +113,7 @@ def from_range(cls, data, name=None, dtype=None, **kwargs): '{0}(...) must be called with object coercible to a ' 'range, {1} was passed'.format(cls.__name__, repr(data))) - if compat.PY3: - step = data.step - stop = data.stop - start = data.start - else: - # seems we only have indexing ops to infer - # rather than direct accessors - if len(data) > 1: - step = data[1] - data[0] - stop = data[-1] + step - start = data[0] - elif len(data): - start = data[0] - stop = data[0] + 1 - step = 1 - else: - start = stop = 0 - step = 1 + start, stop, step = get_range_parameters(data) return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs) @classmethod diff --git a/pandas/core/series.py b/pandas/core/series.py index dbd91309ed185..1c92c4b8850ee 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -60,7 +60,8 @@ from pandas.core.indexes.period import PeriodIndex from pandas import compat from pandas.io.formats.terminal import get_terminal_size -from pandas.compat import zip, u, OrderedDict, StringIO +from pandas.compat import ( + zip, u, OrderedDict, StringIO, range, get_range_parameters) from pandas.compat.numpy import function as nv from pandas.core import accessor @@ -3177,6 +3178,11 @@ def _try_cast(arr, take_fast_path): subarr = maybe_cast_to_datetime(subarr, dtype) + elif isinstance(data, range): + # GH 16804 + start, stop, step = get_range_parameters(data) + arr = np.arange(start, stop, step, dtype='int64') + subarr = _try_cast(arr, False) else: subarr = _try_cast(data, False) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 7f1cc12ec4277..c55c79ef18602 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -829,7 +829,7 @@ def test_constructor_list_of_lists(self): # GH 4851 # list of 0-dim ndarrays - expected = DataFrame({0: range(10)}) + expected = DataFrame({0: np.arange(10)}) data = [np.array(x) for x in range(10)] result = DataFrame(data) tm.assert_frame_equal(result, expected) @@ -1927,6 +1927,13 @@ def test_to_frame_with_falsey_names(self): result = DataFrame(Series(name=0)).dtypes tm.assert_series_equal(result, expected) + @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = DataFrame({'A': [0, 1, 2, 3, 4]}, dtype=dtype or 'int64') + result = DataFrame({'A': range(5)}, dtype=dtype) + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ(TestData): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 41ddfe934a131..0e4957da5478c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -532,7 +532,7 @@ def f(): def f(): df = DataFrame() - df['foo'] = Series(range(len(df))) + df['foo'] = Series(np.arange(len(df)), dtype='float64') return df tm.assert_frame_equal(f(), expected) diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py index f33e19c7f6223..f3be7bb9905f4 100644 --- a/pandas/tests/series/test_alter_axes.py +++ b/pandas/tests/series/test_alter_axes.py @@ -219,7 +219,7 @@ def test_reorder_levels(self): labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], names=['L0', 'L0', 'L0']) - expected = Series(range(6), index=e_idx) + expected = Series(np.arange(6), index=e_idx) assert_series_equal(result, expected) result = s.reorder_levels(['L0', 'L0', 'L0']) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index df7d7a946e881..d296086021349 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -846,3 +846,10 @@ def test_constructor_generic_timestamp_deprecated(self): msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]') + + @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) + def test_constructor_range_dtype(self, dtype): + # GH 16804 + expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64') + result = Series(range(5), dtype=dtype) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py index ff9d09c033164..ead9ba1e26e2d 100644 --- a/pandas/tests/test_compat.py +++ b/pandas/tests/test_compat.py @@ -3,9 +3,10 @@ Testing that functions from compat work as expected """ +import pytest from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap, lfilter, builtins, iterkeys, itervalues, iteritems, - next) + next, get_range_parameters, PY2) class TestBuiltinIterators(object): @@ -69,3 +70,22 @@ def test_dict_iterators(self): assert next(itervalues({1: 2})) == 2 assert next(iterkeys({1: 2})) == 1 assert next(iteritems({1: 2})) == (1, 2) + + +class TestCompatFunctions(object): + + @pytest.mark.parametrize( + 'start,stop,step', [(0, 10, 2), (11, -2, -1), (0, -5, 1), (2, 4, 8)]) + def test_get_range_parameters(self, start, stop, step): + rng = range(start, stop, step) + if PY2 and len(rng) == 0: + start_expected, stop_expected, step_expected = 0, 0, 1 + elif PY2 and len(rng) == 1: + start_expected, stop_expected, step_expected = start, start + 1, 1 + else: + start_expected, stop_expected, step_expected = start, stop, step + + start_result, stop_result, step_result = get_range_parameters(rng) + assert start_result == start_expected + assert stop_result == stop_expected + assert step_result == step_expected diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 4e26689badb3c..ac8297a53de37 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -3033,7 +3033,7 @@ def test_nearest(self): result = pd.Series(range(3), index=index).resample('20s').nearest() expected = pd.Series( - np.array([0, 0, 1, 1, 1, 2, 2]), + [0, 0, 1, 1, 1, 2, 2], index=pd.DatetimeIndex( ['2000-01-01 00:00:00', '2000-01-01 00:00:20', '2000-01-01 00:00:40', '2000-01-01 00:01:00', diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py index 432350b4849d8..c567613acebd1 100644 --- a/pandas/tests/test_window.py +++ b/pandas/tests/test_window.py @@ -698,8 +698,8 @@ def get_expects(self): return expects def _create_dtype_data(self, dtype): - sr1 = Series(range(5), dtype=dtype) - sr2 = Series(range(10, 0, -2), dtype=dtype) + sr1 = Series(np.arange(5), dtype=dtype) + sr2 = Series(np.arange(10, 0, -2), dtype=dtype) df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype) data = {