Skip to content

Commit ead3ca8

Browse files
sinhrksjreback
authored andcommitted
BUG: Index dtype may not be applied properly
1 parent ba5106e commit ead3ca8

File tree

3 files changed

+83
-32
lines changed

3 files changed

+83
-32
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1100,3 +1100,4 @@ Bug Fixes
11001100
- Bug in ``date_range`` results in empty if freq is negative annualy, quarterly and monthly (:issue:`11018`)
11011101
- Bug in ``DatetimeIndex`` cannot infer negative freq (:issue:`11018`)
11021102
- Remove use of some deprecated numpy comparison operations, mainly in tests. (:issue:`10569`)
1103+
- Bug in ``Index`` dtype may not applied properly (:issue:`11017`)

pandas/core/index.py

+28-32
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
117117
if fastpath:
118118
return cls._simple_new(data, name)
119119

120-
from pandas.tseries.period import PeriodIndex
120+
if is_categorical_dtype(data) or is_categorical_dtype(dtype):
121+
return CategoricalIndex(data, copy=copy, name=name, **kwargs)
122+
121123
if isinstance(data, (np.ndarray, Index, ABCSeries)):
122124
if issubclass(data.dtype.type, np.datetime64) or is_datetimetz(data):
123125
from pandas.tseries.index import DatetimeIndex
@@ -137,10 +139,11 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
137139
if dtype is not None:
138140
try:
139141
data = np.array(data, dtype=dtype, copy=copy)
140-
except TypeError:
142+
except (TypeError, ValueError):
141143
pass
142144

143145
# maybe coerce to a sub-class
146+
from pandas.tseries.period import PeriodIndex
144147
if isinstance(data, PeriodIndex):
145148
return PeriodIndex(data, copy=copy, name=name, **kwargs)
146149
if issubclass(data.dtype.type, np.integer):
@@ -149,8 +152,6 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
149152
return Float64Index(data, copy=copy, dtype=dtype, name=name)
150153
elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
151154
subarr = data.astype('object')
152-
elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
153-
return CategoricalIndex(data, copy=copy, name=name, **kwargs)
154155
else:
155156
subarr = com._asarray_tuplesafe(data, dtype=object)
156157

@@ -159,8 +160,28 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
159160
if copy:
160161
subarr = subarr.copy()
161162

162-
elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
163-
return CategoricalIndex(data, copy=copy, name=name, **kwargs)
163+
if dtype is None:
164+
inferred = lib.infer_dtype(subarr)
165+
if inferred == 'integer':
166+
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
167+
elif inferred in ['floating', 'mixed-integer-float']:
168+
return Float64Index(subarr, copy=copy, name=name)
169+
elif inferred == 'boolean':
170+
# don't support boolean explicity ATM
171+
pass
172+
elif inferred != 'string':
173+
if (inferred.startswith('datetime') or
174+
tslib.is_timestamp_array(subarr)):
175+
from pandas.tseries.index import DatetimeIndex
176+
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
177+
elif (inferred.startswith('timedelta') or
178+
lib.is_timedelta_array(subarr)):
179+
from pandas.tseries.tdi import TimedeltaIndex
180+
return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs)
181+
elif inferred == 'period':
182+
return PeriodIndex(subarr, name=name, **kwargs)
183+
return cls._simple_new(subarr, name)
184+
164185
elif hasattr(data, '__array__'):
165186
return Index(np.asarray(data), dtype=dtype, copy=copy, name=name,
166187
**kwargs)
@@ -172,9 +193,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
172193
# we must be all tuples, otherwise don't construct
173194
# 10697
174195
if all( isinstance(e, tuple) for e in data ):
175-
176196
try:
177-
178197
# must be orderable in py3
179198
if compat.PY3:
180199
sorted(data)
@@ -183,32 +202,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
183202
except (TypeError, KeyError):
184203
# python2 - MultiIndex fails on mixed types
185204
pass
186-
187205
# other iterable of some kind
188206
subarr = com._asarray_tuplesafe(data, dtype=object)
189-
190-
if dtype is None:
191-
inferred = lib.infer_dtype(subarr)
192-
if inferred == 'integer':
193-
return Int64Index(subarr.astype('i8'), copy=copy, name=name)
194-
elif inferred in ['floating', 'mixed-integer-float']:
195-
return Float64Index(subarr, copy=copy, name=name)
196-
elif inferred == 'boolean':
197-
# don't support boolean explicity ATM
198-
pass
199-
elif inferred != 'string':
200-
if (inferred.startswith('datetime') or
201-
tslib.is_timestamp_array(subarr)):
202-
from pandas.tseries.index import DatetimeIndex
203-
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
204-
elif (inferred.startswith('timedelta') or
205-
lib.is_timedelta_array(subarr)):
206-
from pandas.tseries.tdi import TimedeltaIndex
207-
return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs)
208-
elif inferred == 'period':
209-
return PeriodIndex(subarr, name=name, **kwargs)
210-
211-
return cls._simple_new(subarr, name)
207+
return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs)
212208

213209
@classmethod
214210
def _simple_new(cls, values, name=None, dtype=None, **kwargs):

pandas/tests/test_index.py

+54
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,11 @@ def test_constructor_from_series(self):
651651
df = pd.DataFrame(np.random.rand(5,3))
652652
df['date'] = ['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990']
653653
result = DatetimeIndex(df['date'], freq='MS')
654+
self.assertTrue(result.equals(expected))
655+
self.assertEqual(df['date'].dtype, object)
656+
657+
exp = pd.Series(['1-1-1990', '2-1-1990', '3-1-1990', '4-1-1990', '5-1-1990'], name='date')
658+
self.assert_series_equal(df['date'], exp)
654659

655660
# GH 6274
656661
# infer freq of same
@@ -693,6 +698,55 @@ def test_constructor_simple_new(self):
693698
result = idx._simple_new(idx, 'obj')
694699
self.assertTrue(result.equals(idx))
695700

701+
def test_constructor_dtypes(self):
702+
703+
for idx in [Index(np.array([1, 2, 3], dtype=int)),
704+
Index(np.array([1, 2, 3], dtype=int), dtype=int),
705+
Index(np.array([1., 2., 3.], dtype=float), dtype=int),
706+
Index([1, 2, 3], dtype=int),
707+
Index([1., 2., 3.], dtype=int)]:
708+
self.assertIsInstance(idx, Int64Index)
709+
710+
for idx in [Index(np.array([1., 2., 3.], dtype=float)),
711+
Index(np.array([1, 2, 3], dtype=int), dtype=float),
712+
Index(np.array([1., 2., 3.], dtype=float), dtype=float),
713+
Index([1, 2, 3], dtype=float),
714+
Index([1., 2., 3.], dtype=float)]:
715+
self.assertIsInstance(idx, Float64Index)
716+
717+
for idx in [Index(np.array([True, False, True], dtype=bool)),
718+
Index([True, False, True]),
719+
Index(np.array([True, False, True], dtype=bool), dtype=bool),
720+
Index([True, False, True], dtype=bool)]:
721+
self.assertIsInstance(idx, Index)
722+
self.assertEqual(idx.dtype, object)
723+
724+
for idx in [Index(np.array([1, 2, 3], dtype=int), dtype='category'),
725+
Index([1, 2, 3], dtype='category'),
726+
Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')]), dtype='category'),
727+
Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype='category')]:
728+
self.assertIsInstance(idx, CategoricalIndex)
729+
730+
for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')])),
731+
Index([datetime(2011, 1, 1), datetime(2011, 1, 2)])]:
732+
self.assertIsInstance(idx, DatetimeIndex)
733+
734+
for idx in [Index(np.array([np.datetime64('2011-01-01'), np.datetime64('2011-01-02')]), dtype=object),
735+
Index([datetime(2011, 1, 1), datetime(2011, 1, 2)], dtype=object)]:
736+
self.assertNotIsInstance(idx, DatetimeIndex)
737+
self.assertIsInstance(idx, Index)
738+
self.assertEqual(idx.dtype, object)
739+
740+
for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')])),
741+
Index([timedelta(1), timedelta(1)])]:
742+
self.assertIsInstance(idx, TimedeltaIndex)
743+
744+
for idx in [Index(np.array([np.timedelta64(1, 'D'), np.timedelta64(1, 'D')]), dtype=object),
745+
Index([timedelta(1), timedelta(1)], dtype=object)]:
746+
self.assertNotIsInstance(idx, TimedeltaIndex)
747+
self.assertIsInstance(idx, Index)
748+
self.assertEqual(idx.dtype, object)
749+
696750
def test_view_with_args(self):
697751

698752
restricted = ['unicodeIndex','strIndex','catIndex','boolIndex','empty']

0 commit comments

Comments
 (0)