Skip to content

Commit edfb784

Browse files
jschendeljreback
authored andcommitted
CLN/BUG: Consolidate Index.astype and fix tz aware bugs (#18937)
1 parent e7d9102 commit edfb784

File tree

13 files changed

+133
-141
lines changed

13 files changed

+133
-141
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ Conversion
289289
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
290290
- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`)
291291
- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`)
292+
- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`)
292293

293294

294295
Indexing

pandas/core/indexes/base.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -1065,12 +1065,18 @@ def _to_embed(self, keep_tz=False, dtype=None):
10651065

10661066
@Appender(_index_shared_docs['astype'])
10671067
def astype(self, dtype, copy=True):
1068-
if is_categorical_dtype(dtype):
1068+
if is_dtype_equal(self.dtype, dtype):
1069+
return self.copy() if copy else self
1070+
elif is_categorical_dtype(dtype):
10691071
from .category import CategoricalIndex
10701072
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
10711073
copy=copy)
1072-
return Index(self.values.astype(dtype, copy=copy), name=self.name,
1073-
dtype=dtype)
1074+
try:
1075+
return Index(self.values.astype(dtype, copy=copy), name=self.name,
1076+
dtype=dtype)
1077+
except (TypeError, ValueError):
1078+
msg = 'Cannot cast {name} to dtype {dtype}'
1079+
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
10741080

10751081
def _to_safe_for_reshape(self):
10761082
""" convert to object if we are a categorical """

pandas/core/indexes/datetimelike.py

+31-6
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,22 @@
1111

1212
import numpy as np
1313
from pandas.core.dtypes.common import (
14-
is_integer, is_float,
15-
is_bool_dtype, _ensure_int64,
16-
is_scalar, is_dtype_equal,
17-
is_list_like, is_timedelta64_dtype)
14+
_ensure_int64,
15+
is_dtype_equal,
16+
is_float,
17+
is_integer,
18+
is_list_like,
19+
is_scalar,
20+
is_bool_dtype,
21+
is_categorical_dtype,
22+
is_datetime_or_timedelta_dtype,
23+
is_float_dtype,
24+
is_integer_dtype,
25+
is_object_dtype,
26+
is_string_dtype,
27+
is_timedelta64_dtype)
1828
from pandas.core.dtypes.generic import (
19-
ABCIndex, ABCSeries,
20-
ABCPeriodIndex, ABCIndexClass)
29+
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
2130
from pandas.core.dtypes.missing import isna
2231
from pandas.core import common as com, algorithms
2332
from pandas.core.algorithms import checked_add_with_arr
@@ -859,6 +868,22 @@ def _concat_same_dtype(self, to_concat, name):
859868
new_data = np.concatenate([c.asi8 for c in to_concat])
860869
return self._simple_new(new_data, **attribs)
861870

871+
def astype(self, dtype, copy=True):
872+
if is_object_dtype(dtype):
873+
return self._box_values_as_index()
874+
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
875+
return Index(self.format(), name=self.name, dtype=object)
876+
elif is_integer_dtype(dtype):
877+
return Index(self.values.astype('i8', copy=copy), name=self.name,
878+
dtype='i8')
879+
elif (is_datetime_or_timedelta_dtype(dtype) and
880+
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
881+
# disallow conversion between datetime/timedelta,
882+
# and conversions for any datetimelike to float
883+
msg = 'Cannot cast {name} to dtype {dtype}'
884+
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
885+
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
886+
862887

863888
def _ensure_datetimelike_to_i8(other):
864889
""" helper for coercing an input scalar or array to i8 """

pandas/core/indexes/datetimes.py

+19-29
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,19 @@
1010
from pandas.core.base import _shared_docs
1111

1212
from pandas.core.dtypes.common import (
13-
_NS_DTYPE, _INT64_DTYPE,
14-
is_object_dtype, is_datetime64_dtype,
15-
is_datetimetz, is_dtype_equal,
13+
_INT64_DTYPE,
14+
_NS_DTYPE,
15+
is_object_dtype,
16+
is_datetime64_dtype,
17+
is_datetimetz,
18+
is_dtype_equal,
1619
is_timedelta64_dtype,
17-
is_integer, is_float,
20+
is_integer,
21+
is_float,
1822
is_integer_dtype,
1923
is_datetime64_ns_dtype,
2024
is_period_dtype,
2125
is_bool_dtype,
22-
is_string_dtype,
23-
is_categorical_dtype,
2426
is_string_like,
2527
is_list_like,
2628
is_scalar,
@@ -36,20 +38,17 @@
3638
from pandas.core.algorithms import checked_add_with_arr
3739

3840
from pandas.core.indexes.base import Index, _index_shared_docs
39-
from pandas.core.indexes.category import CategoricalIndex
4041
from pandas.core.indexes.numeric import Int64Index, Float64Index
4142
import pandas.compat as compat
42-
from pandas.tseries.frequencies import (
43-
to_offset, get_period_alias,
44-
Resolution)
43+
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
4544
from pandas.core.indexes.datetimelike import (
4645
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
4746
from pandas.tseries.offsets import (
4847
DateOffset, generate_range, Tick, CDay, prefix_mapping)
4948

5049
from pandas.core.tools.timedeltas import to_timedelta
51-
from pandas.util._decorators import (Appender, cache_readonly,
52-
deprecate_kwarg, Substitution)
50+
from pandas.util._decorators import (
51+
Appender, cache_readonly, deprecate_kwarg, Substitution)
5352
import pandas.core.common as com
5453
import pandas.tseries.offsets as offsets
5554
import pandas.core.tools.datetimes as tools
@@ -906,25 +905,16 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
906905
@Appender(_index_shared_docs['astype'])
907906
def astype(self, dtype, copy=True):
908907
dtype = pandas_dtype(dtype)
909-
if is_object_dtype(dtype):
910-
return self._box_values_as_index()
911-
elif is_integer_dtype(dtype):
912-
return Index(self.values.astype('i8', copy=copy), name=self.name,
913-
dtype='i8')
914-
elif is_datetime64_ns_dtype(dtype):
915-
if self.tz is not None:
916-
return self.tz_convert('UTC').tz_localize(None)
917-
elif copy is True:
918-
return self.copy()
919-
return self
920-
elif is_categorical_dtype(dtype):
921-
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
922-
copy=copy)
923-
elif is_string_dtype(dtype):
924-
return Index(self.format(), name=self.name, dtype=object)
908+
if (is_datetime64_ns_dtype(dtype) and
909+
not is_dtype_equal(dtype, self.dtype)):
910+
# GH 18951: datetime64_ns dtype but not equal means different tz
911+
new_tz = getattr(dtype, 'tz', None)
912+
if getattr(self.dtype, 'tz', None) is None:
913+
return self.tz_localize(new_tz)
914+
return self.tz_convert(new_tz)
925915
elif is_period_dtype(dtype):
926916
return self.to_period(freq=dtype.freq)
927-
raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
917+
return super(DatetimeIndex, self).astype(dtype, copy=copy)
928918

929919
def _get_time_micros(self):
930920
values = self.asi8

pandas/core/indexes/interval.py

+2-13
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
is_datetime_or_timedelta_dtype,
1313
is_datetime64tz_dtype,
1414
is_integer_dtype,
15-
is_object_dtype,
16-
is_categorical_dtype,
1715
is_float_dtype,
1816
is_interval_dtype,
1917
is_scalar,
@@ -29,7 +27,6 @@
2927
Interval, IntervalMixin, IntervalTree,
3028
intervals_to_interval_bounds)
3129

32-
from pandas.core.indexes.category import CategoricalIndex
3330
from pandas.core.indexes.datetimes import date_range
3431
from pandas.core.indexes.timedeltas import timedelta_range
3532
from pandas.core.indexes.multi import MultiIndex
@@ -671,16 +668,8 @@ def copy(self, deep=False, name=None):
671668
@Appender(_index_shared_docs['astype'])
672669
def astype(self, dtype, copy=True):
673670
if is_interval_dtype(dtype):
674-
if copy:
675-
self = self.copy()
676-
return self
677-
elif is_object_dtype(dtype):
678-
return Index(self.values, dtype=object)
679-
elif is_categorical_dtype(dtype):
680-
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
681-
copy=copy)
682-
raise ValueError('Cannot cast IntervalIndex to dtype {dtype}'
683-
.format(dtype=dtype))
671+
return self.copy() if copy else self
672+
return super(IntervalIndex, self).astype(dtype, copy=copy)
684673

685674
@cache_readonly
686675
def dtype(self):

pandas/core/indexes/numeric.py

+9-20
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@
44
from pandas.core.dtypes.common import (
55
is_dtype_equal,
66
pandas_dtype,
7-
is_float_dtype,
8-
is_object_dtype,
7+
needs_i8_conversion,
98
is_integer_dtype,
10-
is_categorical_dtype,
119
is_bool,
1210
is_bool_dtype,
1311
is_scalar)
@@ -17,7 +15,6 @@
1715
from pandas.core import algorithms
1816
from pandas.core.indexes.base import (
1917
Index, InvalidIndexError, _index_shared_docs)
20-
from pandas.core.indexes.category import CategoricalIndex
2118
from pandas.util._decorators import Appender, cache_readonly
2219
import pandas.core.dtypes.concat as _concat
2320
import pandas.core.indexes.base as ibase
@@ -315,22 +312,14 @@ def inferred_type(self):
315312
@Appender(_index_shared_docs['astype'])
316313
def astype(self, dtype, copy=True):
317314
dtype = pandas_dtype(dtype)
318-
if is_float_dtype(dtype):
319-
values = self._values.astype(dtype, copy=copy)
320-
elif is_integer_dtype(dtype):
321-
if self.hasnans:
322-
raise ValueError('cannot convert float NaN to integer')
323-
values = self._values.astype(dtype, copy=copy)
324-
elif is_object_dtype(dtype):
325-
values = self._values.astype('object', copy=copy)
326-
elif is_categorical_dtype(dtype):
327-
return CategoricalIndex(self, name=self.name, dtype=dtype,
328-
copy=copy)
329-
else:
330-
raise TypeError('Setting {cls} dtype to anything other than '
331-
'float64, object, or category is not supported'
332-
.format(cls=self.__class__))
333-
return Index(values, name=self.name, dtype=dtype)
315+
if needs_i8_conversion(dtype):
316+
msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
317+
'values are required for conversion').format(dtype=dtype)
318+
raise TypeError(msg)
319+
elif is_integer_dtype(dtype) and self.hasnans:
320+
# GH 13149
321+
raise ValueError('Cannot convert NA to integer')
322+
return super(Float64Index, self).astype(dtype, copy=copy)
334323

335324
@Appender(_index_shared_docs['_convert_scalar_indexer'])
336325
def _convert_scalar_indexer(self, key, kind=None):

pandas/core/indexes/period.py

+7-19
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,21 @@
77
from pandas.core.dtypes.common import (
88
is_integer,
99
is_float,
10-
is_object_dtype,
1110
is_integer_dtype,
1211
is_float_dtype,
1312
is_scalar,
1413
is_datetime64_dtype,
15-
is_datetime64tz_dtype,
14+
is_datetime64_any_dtype,
1615
is_timedelta64_dtype,
1716
is_period_dtype,
1817
is_bool_dtype,
19-
is_categorical_dtype,
2018
pandas_dtype,
2119
_ensure_object)
2220
from pandas.core.dtypes.dtypes import PeriodDtype
2321
from pandas.core.dtypes.generic import ABCSeries
2422

2523
import pandas.tseries.frequencies as frequencies
2624
from pandas.tseries.frequencies import get_freq_code as _gfc
27-
from pandas.core.indexes.category import CategoricalIndex
2825
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index
2926
from pandas.core.indexes.timedeltas import TimedeltaIndex
3027
from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin
@@ -506,23 +503,14 @@ def asof_locs(self, where, mask):
506503
@Appender(_index_shared_docs['astype'])
507504
def astype(self, dtype, copy=True, how='start'):
508505
dtype = pandas_dtype(dtype)
509-
if is_object_dtype(dtype):
510-
return self._box_values_as_index()
511-
elif is_integer_dtype(dtype):
512-
if copy:
513-
return self._int64index.copy()
514-
else:
515-
return self._int64index
516-
elif is_datetime64_dtype(dtype):
517-
return self.to_timestamp(how=how)
518-
elif is_datetime64tz_dtype(dtype):
519-
return self.to_timestamp(how=how).tz_localize(dtype.tz)
506+
if is_integer_dtype(dtype):
507+
return self._int64index.copy() if copy else self._int64index
508+
elif is_datetime64_any_dtype(dtype):
509+
tz = getattr(dtype, 'tz', None)
510+
return self.to_timestamp(how=how).tz_localize(tz)
520511
elif is_period_dtype(dtype):
521512
return self.asfreq(freq=dtype.freq)
522-
elif is_categorical_dtype(dtype):
523-
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
524-
copy=copy)
525-
raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
513+
return super(PeriodIndex, self).astype(dtype, copy=copy)
526514

527515
@Substitution(klass='PeriodIndex')
528516
@Appender(_shared_docs['searchsorted'])

pandas/core/indexes/timedeltas.py

+6-23
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,20 @@
44
import numpy as np
55
from pandas.core.dtypes.common import (
66
_TD_DTYPE,
7-
is_integer, is_float,
7+
is_integer,
8+
is_float,
89
is_bool_dtype,
910
is_list_like,
1011
is_scalar,
11-
is_integer_dtype,
12-
is_object_dtype,
1312
is_timedelta64_dtype,
1413
is_timedelta64_ns_dtype,
15-
is_categorical_dtype,
1614
pandas_dtype,
1715
_ensure_int64)
1816
from pandas.core.dtypes.missing import isna
1917
from pandas.core.dtypes.generic import ABCSeries
2018
from pandas.core.common import _maybe_box, _values_from_object
2119

2220
from pandas.core.indexes.base import Index
23-
from pandas.core.indexes.category import CategoricalIndex
2421
from pandas.core.indexes.numeric import Int64Index
2522
import pandas.compat as compat
2623
from pandas.compat import u
@@ -483,28 +480,14 @@ def to_pytimedelta(self):
483480
@Appender(_index_shared_docs['astype'])
484481
def astype(self, dtype, copy=True):
485482
dtype = pandas_dtype(dtype)
486-
487-
if is_object_dtype(dtype):
488-
return self._box_values_as_index()
489-
elif is_timedelta64_ns_dtype(dtype):
490-
if copy is True:
491-
return self.copy()
492-
return self
493-
elif is_timedelta64_dtype(dtype):
483+
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
494484
# return an index (essentially this is division)
495485
result = self.values.astype(dtype, copy=copy)
496486
if self.hasnans:
497-
return Index(self._maybe_mask_results(result,
498-
convert='float64'),
499-
name=self.name)
487+
values = self._maybe_mask_results(result, convert='float64')
488+
return Index(values, name=self.name)
500489
return Index(result.astype('i8'), name=self.name)
501-
elif is_integer_dtype(dtype):
502-
return Index(self.values.astype('i8', copy=copy), dtype='i8',
503-
name=self.name)
504-
elif is_categorical_dtype(dtype):
505-
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
506-
copy=copy)
507-
raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
490+
return super(TimedeltaIndex, self).astype(dtype, copy=copy)
508491

509492
def union(self, other):
510493
"""

0 commit comments

Comments
 (0)