Skip to content

Commit 3760f16

Browse files
sinhrksjreback
authored andcommitted
BUG: Index may ignore specified datetime/timedelta dtypes
- [x] tests added / passed - [x] passes ``git diff upstream/master | flake8 --diff`` - [x] whatsnew entry ``Index`` ignores specified ``dtype`` if it is ``datetime64`` (normal and tz) or ``timedelta64``. This PR makes it consistent with ``DatetimeIndex`` and ``TimedeltaIndex``. ``` pd.Index([1, 2, 3], dtype='datetime64[ns, US/Eastern]') # Index([1, 2, 3], dtype='object') pd.Index([1, 2, 3], dtype='datetime64[ns]') # Index([1, 2, 3], dtype='object') pd.Index([1, 2, 3], dtype='timedelta64[ns]') # Int64Index([1, 2, 3], dtype='int64') ``` Also, fixed ``MultiIndex.get_level_values`` not to pass unnecessary ``tz`` and ``freq``. Author: sinhrks <[email protected]> Closes #13981 from sinhrks/index_init_datetimelike and squashes the following commits: a922aef [sinhrks] BUG: Index may ignore specified datetime/timedelta dtypes
1 parent 0975509 commit 3760f16

File tree

6 files changed

+76
-14
lines changed

6 files changed

+76
-14
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -957,6 +957,7 @@ Bug Fixes
957957
- Bug in ``DatetimeIndex`` with nanosecond frequency does not include timestamp specified with ``end`` (:issue:`13672`)
958958

959959
- Bug in ``Index`` raises ``OutOfBoundsDatetime`` if ``datetime`` exceeds ``datetime64[ns]`` bounds, rather than coercing to ``object`` dtype (:issue:`13663`)
960+
- Bug in ``Index`` may ignores specified ``datetime64`` or ``timedelta64`` ``dtype`` (:issue:`13981`)
960961
- Bug in ``RangeIndex`` can be created without no arguments rather than raises ``TypeError`` (:issue:`13793`)
961962
- Bug in ``.value_counts`` raises ``OutOfBoundsDatetime`` if data exceeds ``datetime64[ns]`` bounds (:issue:`13663`)
962963
- Bug in ``DatetimeIndex`` may raise ``OutOfBoundsDatetime`` if input ``np.datetime64`` has other unit than ``ns`` (:issue:`9114`)

pandas/indexes/base.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,15 @@
1919
from pandas.types.missing import isnull, array_equivalent
2020
from pandas.types.common import (_ensure_int64, _ensure_object,
2121
_ensure_platform_int,
22-
is_datetimetz,
2322
is_integer,
2423
is_float,
2524
is_dtype_equal,
2625
is_object_dtype,
2726
is_categorical_dtype,
2827
is_bool_dtype,
2928
is_integer_dtype, is_float_dtype,
29+
is_datetime64_any_dtype,
30+
is_timedelta64_dtype,
3031
needs_i8_conversion,
3132
is_iterator, is_list_like,
3233
is_scalar)
@@ -162,16 +163,19 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
162163
# index-like
163164
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
164165

165-
if (issubclass(data.dtype.type, np.datetime64) or
166-
is_datetimetz(data)):
166+
if (is_datetime64_any_dtype(data) or
167+
(dtype is not None and is_datetime64_any_dtype(dtype)) or
168+
'tz' in kwargs):
167169
from pandas.tseries.index import DatetimeIndex
168-
result = DatetimeIndex(data, copy=copy, name=name, **kwargs)
169-
if dtype is not None and _o_dtype == dtype:
170+
result = DatetimeIndex(data, copy=copy, name=name,
171+
dtype=dtype, **kwargs)
172+
if dtype is not None and is_dtype_equal(_o_dtype, dtype):
170173
return Index(result.to_pydatetime(), dtype=_o_dtype)
171174
else:
172175
return result
173176

174-
elif issubclass(data.dtype.type, np.timedelta64):
177+
elif (is_timedelta64_dtype(data) or
178+
(dtype is not None and is_timedelta64_dtype(dtype))):
175179
from pandas.tseries.tdi import TimedeltaIndex
176180
result = TimedeltaIndex(data, copy=copy, name=name, **kwargs)
177181
if dtype is not None and _o_dtype == dtype:

pandas/indexes/multi.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -687,10 +687,7 @@ def get_level_values(self, level):
687687
labels = self.labels[num]
688688
filled = algos.take_1d(unique.values, labels,
689689
fill_value=unique._na_value)
690-
_simple_new = unique._simple_new
691-
values = _simple_new(filled, name=self.names[num],
692-
freq=getattr(unique, 'freq', None),
693-
tz=getattr(unique, 'tz', None))
690+
values = unique._shallow_copy(filled)
694691
return values
695692

696693
def format(self, space=2, sparsify=None, adjoin=True, names=False,

pandas/tests/indexes/test_base.py

+39
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,45 @@ def test_constructor_dtypes(self):
323323
self.assertIsInstance(idx, Index)
324324
self.assertEqual(idx.dtype, object)
325325

326+
def test_constructor_dtypes_datetime(self):
327+
328+
for tz in [None, 'UTC', 'US/Eastern', 'Asia/Tokyo']:
329+
idx = pd.date_range('2011-01-01', periods=5, tz=tz)
330+
dtype = idx.dtype
331+
332+
# pass values without timezone, as DatetimeIndex localizes it
333+
for values in [pd.date_range('2011-01-01', periods=5).values,
334+
pd.date_range('2011-01-01', periods=5).asi8]:
335+
336+
for res in [pd.Index(values, tz=tz),
337+
pd.Index(values, dtype=dtype),
338+
pd.Index(list(values), tz=tz),
339+
pd.Index(list(values), dtype=dtype)]:
340+
tm.assert_index_equal(res, idx)
341+
342+
# check compat with DatetimeIndex
343+
for res in [pd.DatetimeIndex(values, tz=tz),
344+
pd.DatetimeIndex(values, dtype=dtype),
345+
pd.DatetimeIndex(list(values), tz=tz),
346+
pd.DatetimeIndex(list(values), dtype=dtype)]:
347+
tm.assert_index_equal(res, idx)
348+
349+
def test_constructor_dtypes_timedelta(self):
350+
351+
idx = pd.timedelta_range('1 days', periods=5)
352+
dtype = idx.dtype
353+
354+
for values in [idx.values, idx.asi8]:
355+
356+
for res in [pd.Index(values, dtype=dtype),
357+
pd.Index(list(values), dtype=dtype)]:
358+
tm.assert_index_equal(res, idx)
359+
360+
# check compat with TimedeltaIndex
361+
for res in [pd.TimedeltaIndex(values, dtype=dtype),
362+
pd.TimedeltaIndex(list(values), dtype=dtype)]:
363+
tm.assert_index_equal(res, idx)
364+
326365
def test_view_with_args(self):
327366

328367
restricted = ['unicodeIndex', 'strIndex', 'catIndex', 'boolIndex',

pandas/tests/indexes/test_multi.py

+24
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,30 @@ def test_from_arrays_index_series_period(self):
632632

633633
tm.assert_index_equal(result, result2)
634634

635+
def test_from_arrays_index_datetimelike_mixed(self):
636+
idx1 = pd.date_range('2015-01-01 10:00', freq='D', periods=3,
637+
tz='US/Eastern')
638+
idx2 = pd.date_range('2015-01-01 10:00', freq='H', periods=3)
639+
idx3 = pd.timedelta_range('1 days', freq='D', periods=3)
640+
idx4 = pd.period_range('2011-01-01', freq='D', periods=3)
641+
642+
result = pd.MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
643+
tm.assert_index_equal(result.get_level_values(0), idx1)
644+
tm.assert_index_equal(result.get_level_values(1), idx2)
645+
tm.assert_index_equal(result.get_level_values(2), idx3)
646+
tm.assert_index_equal(result.get_level_values(3), idx4)
647+
648+
result2 = pd.MultiIndex.from_arrays([pd.Series(idx1),
649+
pd.Series(idx2),
650+
pd.Series(idx3),
651+
pd.Series(idx4)])
652+
tm.assert_index_equal(result2.get_level_values(0), idx1)
653+
tm.assert_index_equal(result2.get_level_values(1), idx2)
654+
tm.assert_index_equal(result2.get_level_values(2), idx3)
655+
tm.assert_index_equal(result2.get_level_values(3), idx4)
656+
657+
tm.assert_index_equal(result, result2)
658+
635659
def test_from_arrays_different_lengths(self):
636660
# GH13599
637661
idx1 = [1, 2, 3]

pandas/util/testing.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -749,10 +749,7 @@ def _get_ilevel_values(index, level):
749749
unique = index.levels[level]
750750
labels = index.labels[level]
751751
filled = take_1d(unique.values, labels, fill_value=unique._na_value)
752-
values = unique._simple_new(filled,
753-
name=index.names[level],
754-
freq=getattr(unique, 'freq', None),
755-
tz=getattr(unique, 'tz', None))
752+
values = unique._shallow_copy(filled, name=index.names[level])
756753
return values
757754

758755
# instance validation

0 commit comments

Comments
 (0)