Skip to content

Commit a2e5400

Browse files
jschendeljreback
authored andcommitted
BUG: Fix range dtype in Series/DataFrame constructor on Windows (#17840)
1 parent e457325 commit a2e5400

File tree

11 files changed

+73
-27
lines changed

11 files changed

+73
-27
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,7 @@ Conversion
955955
- Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`)
956956
- Bug in :meth:`~DataFrame.astype` converting to object dtype when passed extension type classes (`DatetimeTZDtype``, ``CategoricalDtype``) rather than instances. Now a ``TypeError`` is raised when a class is passed (:issue:`17780`).
957957
- Bug in :meth:`to_numeric` in which elements were not always being coerced to numeric when ``errors='coerce'`` (:issue:`17007`, :issue:`17125`)
958+
- Bug in ``DataFrame`` and ``Series`` constructors where ``range`` objects are converted to ``int32`` dtype on Windows instead of ``int64`` (:issue:`16804`)
958959

959960
Indexing
960961
^^^^^^^^

pandas/compat/__init__.py

+22
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,10 @@ def signature(f):
100100
'varargs', 'keywords'])
101101
return argspec(args, defaults, varargs, keywords)
102102

103+
def get_range_parameters(data):
104+
"""Gets the start, stop, and step parameters from a range object"""
105+
return data.start, data.stop, data.step
106+
103107
# have to explicitly put builtins into the namespace
104108
range = range
105109
map = map
@@ -146,6 +150,24 @@ def bytes_to_str(b, encoding='ascii'):
146150
def signature(f):
147151
return inspect.getargspec(f)
148152

153+
def get_range_parameters(data):
154+
"""Gets the start, stop, and step parameters from a range object"""
155+
# seems we only have indexing ops to infer
156+
# rather than direct accessors
157+
if len(data) > 1:
158+
step = data[1] - data[0]
159+
stop = data[-1] + step
160+
start = data[0]
161+
elif len(data):
162+
start = data[0]
163+
stop = data[0] + 1
164+
step = 1
165+
else:
166+
start = stop = 0
167+
step = 1
168+
169+
return start, stop, step
170+
149171
# import iterator versions of these functions
150172
range = xrange
151173
intern = intern

pandas/core/indexes/range.py

+2-19
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
is_int64_dtype)
1111

1212
from pandas import compat
13-
from pandas.compat import lrange, range
13+
from pandas.compat import lrange, range, get_range_parameters
1414
from pandas.compat.numpy import function as nv
1515
from pandas.core.common import _all_none
1616
from pandas.core.indexes.base import Index, _index_shared_docs
@@ -113,24 +113,7 @@ def from_range(cls, data, name=None, dtype=None, **kwargs):
113113
'{0}(...) must be called with object coercible to a '
114114
'range, {1} was passed'.format(cls.__name__, repr(data)))
115115

116-
if compat.PY3:
117-
step = data.step
118-
stop = data.stop
119-
start = data.start
120-
else:
121-
# seems we only have indexing ops to infer
122-
# rather than direct accessors
123-
if len(data) > 1:
124-
step = data[1] - data[0]
125-
stop = data[-1] + step
126-
start = data[0]
127-
elif len(data):
128-
start = data[0]
129-
stop = data[0] + 1
130-
step = 1
131-
else:
132-
start = stop = 0
133-
step = 1
116+
start, stop, step = get_range_parameters(data)
134117
return RangeIndex(start, stop, step, dtype=dtype, name=name, **kwargs)
135118

136119
@classmethod

pandas/core/series.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@
6060
from pandas.core.indexes.period import PeriodIndex
6161
from pandas import compat
6262
from pandas.io.formats.terminal import get_terminal_size
63-
from pandas.compat import zip, u, OrderedDict, StringIO
63+
from pandas.compat import (
64+
zip, u, OrderedDict, StringIO, range, get_range_parameters)
6465
from pandas.compat.numpy import function as nv
6566

6667
from pandas.core import accessor
@@ -3177,6 +3178,11 @@ def _try_cast(arr, take_fast_path):
31773178

31783179
subarr = maybe_cast_to_datetime(subarr, dtype)
31793180

3181+
elif isinstance(data, range):
3182+
# GH 16804
3183+
start, stop, step = get_range_parameters(data)
3184+
arr = np.arange(start, stop, step, dtype='int64')
3185+
subarr = _try_cast(arr, False)
31803186
else:
31813187
subarr = _try_cast(data, False)
31823188

pandas/tests/frame/test_constructors.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -829,7 +829,7 @@ def test_constructor_list_of_lists(self):
829829

830830
# GH 4851
831831
# list of 0-dim ndarrays
832-
expected = DataFrame({0: range(10)})
832+
expected = DataFrame({0: np.arange(10)})
833833
data = [np.array(x) for x in range(10)]
834834
result = DataFrame(data)
835835
tm.assert_frame_equal(result, expected)
@@ -1927,6 +1927,13 @@ def test_to_frame_with_falsey_names(self):
19271927
result = DataFrame(Series(name=0)).dtypes
19281928
tm.assert_series_equal(result, expected)
19291929

1930+
@pytest.mark.parametrize('dtype', [None, 'uint8', 'category'])
1931+
def test_constructor_range_dtype(self, dtype):
1932+
# GH 16804
1933+
expected = DataFrame({'A': [0, 1, 2, 3, 4]}, dtype=dtype or 'int64')
1934+
result = DataFrame({'A': range(5)}, dtype=dtype)
1935+
tm.assert_frame_equal(result, expected)
1936+
19301937

19311938
class TestDataFrameConstructorWithDatetimeTZ(TestData):
19321939

pandas/tests/indexing/test_partial.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ def f():
532532

533533
def f():
534534
df = DataFrame()
535-
df['foo'] = Series(range(len(df)))
535+
df['foo'] = Series(np.arange(len(df)), dtype='float64')
536536
return df
537537

538538
tm.assert_frame_equal(f(), expected)

pandas/tests/series/test_alter_axes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def test_reorder_levels(self):
219219
labels=[[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
220220
[0, 0, 0, 0, 0, 0]],
221221
names=['L0', 'L0', 'L0'])
222-
expected = Series(range(6), index=e_idx)
222+
expected = Series(np.arange(6), index=e_idx)
223223
assert_series_equal(result, expected)
224224

225225
result = s.reorder_levels(['L0', 'L0', 'L0'])

pandas/tests/series/test_constructors.py

+7
Original file line numberDiff line numberDiff line change
@@ -846,3 +846,10 @@ def test_constructor_generic_timestamp_deprecated(self):
846846
msg = "cannot convert datetimelike"
847847
with tm.assert_raises_regex(TypeError, msg):
848848
Series([], dtype='M8[ps]')
849+
850+
@pytest.mark.parametrize('dtype', [None, 'uint8', 'category'])
851+
def test_constructor_range_dtype(self, dtype):
852+
# GH 16804
853+
expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64')
854+
result = Series(range(5), dtype=dtype)
855+
tm.assert_series_equal(result, expected)

pandas/tests/test_compat.py

+21-1
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,10 @@
33
Testing that functions from compat work as expected
44
"""
55

6+
import pytest
67
from pandas.compat import (range, zip, map, filter, lrange, lzip, lmap,
78
lfilter, builtins, iterkeys, itervalues, iteritems,
8-
next)
9+
next, get_range_parameters, PY2)
910

1011

1112
class TestBuiltinIterators(object):
@@ -69,3 +70,22 @@ def test_dict_iterators(self):
6970
assert next(itervalues({1: 2})) == 2
7071
assert next(iterkeys({1: 2})) == 1
7172
assert next(iteritems({1: 2})) == (1, 2)
73+
74+
75+
class TestCompatFunctions(object):
76+
77+
@pytest.mark.parametrize(
78+
'start,stop,step', [(0, 10, 2), (11, -2, -1), (0, -5, 1), (2, 4, 8)])
79+
def test_get_range_parameters(self, start, stop, step):
80+
rng = range(start, stop, step)
81+
if PY2 and len(rng) == 0:
82+
start_expected, stop_expected, step_expected = 0, 0, 1
83+
elif PY2 and len(rng) == 1:
84+
start_expected, stop_expected, step_expected = start, start + 1, 1
85+
else:
86+
start_expected, stop_expected, step_expected = start, stop, step
87+
88+
start_result, stop_result, step_result = get_range_parameters(rng)
89+
assert start_result == start_expected
90+
assert stop_result == stop_expected
91+
assert step_result == step_expected

pandas/tests/test_resample.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3033,7 +3033,7 @@ def test_nearest(self):
30333033
result = pd.Series(range(3), index=index).resample('20s').nearest()
30343034

30353035
expected = pd.Series(
3036-
np.array([0, 0, 1, 1, 1, 2, 2]),
3036+
[0, 0, 1, 1, 1, 2, 2],
30373037
index=pd.DatetimeIndex(
30383038
['2000-01-01 00:00:00', '2000-01-01 00:00:20',
30393039
'2000-01-01 00:00:40', '2000-01-01 00:01:00',

pandas/tests/test_window.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -698,8 +698,8 @@ def get_expects(self):
698698
return expects
699699

700700
def _create_dtype_data(self, dtype):
701-
sr1 = Series(range(5), dtype=dtype)
702-
sr2 = Series(range(10, 0, -2), dtype=dtype)
701+
sr1 = Series(np.arange(5), dtype=dtype)
702+
sr2 = Series(np.arange(10, 0, -2), dtype=dtype)
703703
df = DataFrame(np.arange(10).reshape((5, 2)), dtype=dtype)
704704

705705
data = {

0 commit comments

Comments
 (0)