Skip to content

CLN/BUG: Consolidate Index.astype and fix tz aware bugs #18937

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Dec 27, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ Conversion
- Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`)
- Bug in :class:`Timestamp` where comparison with an array of ``Timestamp`` objects would result in a ``RecursionError`` (:issue:`15183`)
- Bug in :class:`WeekOfMonth` and class:`Week` where addition and subtraction did not roll correctly (:issue:`18510`,:issue:`18672`,:issue:`18864`)
- Bug in :meth:`DatetimeIndex.astype` when converting between timezone aware dtypes, and converting from timezone aware to naive (:issue:`18951`)


Indexing
Expand Down
12 changes: 9 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,12 +1065,18 @@ def _to_embed(self, keep_tz=False, dtype=None):

@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_categorical_dtype(dtype):
if is_dtype_equal(self.dtype, dtype):
return self.copy() if copy else self
elif is_categorical_dtype(dtype):
from .category import CategoricalIndex
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
return Index(self.values.astype(dtype, copy=copy), name=self.name,
dtype=dtype)
try:
return Index(self.values.astype(dtype, copy=copy), name=self.name,
dtype=dtype)
except (TypeError, ValueError):
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))

def _to_safe_for_reshape(self):
""" convert to object if we are a categorical """
Expand Down
37 changes: 31 additions & 6 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,22 @@

import numpy as np
from pandas.core.dtypes.common import (
is_integer, is_float,
is_bool_dtype, _ensure_int64,
is_scalar, is_dtype_equal,
is_list_like, is_timedelta64_dtype)
_ensure_int64,
is_dtype_equal,
is_float,
is_integer,
is_list_like,
is_scalar,
is_bool_dtype,
is_categorical_dtype,
is_datetime_or_timedelta_dtype,
is_float_dtype,
is_integer_dtype,
is_object_dtype,
is_string_dtype,
is_timedelta64_dtype)
from pandas.core.dtypes.generic import (
ABCIndex, ABCSeries,
ABCPeriodIndex, ABCIndexClass)
ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass)
from pandas.core.dtypes.missing import isna
from pandas.core import common as com, algorithms
from pandas.core.algorithms import checked_add_with_arr
Expand Down Expand Up @@ -859,6 +868,22 @@ def _concat_same_dtype(self, to_concat, name):
new_data = np.concatenate([c.asi8 for c in to_concat])
return self._simple_new(new_data, **attribs)

def astype(self, dtype, copy=True):
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif (is_datetime_or_timedelta_dtype(dtype) and
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
# disallow conversion between datetime/timedelta,
# and conversions for any datetimelike to float
msg = 'Cannot cast {name} to dtype {dtype}'
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)


def _ensure_datetimelike_to_i8(other):
""" helper for coercing an input scalar or array to i8 """
Expand Down
48 changes: 19 additions & 29 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,19 @@
from pandas.core.base import _shared_docs

from pandas.core.dtypes.common import (
_NS_DTYPE, _INT64_DTYPE,
is_object_dtype, is_datetime64_dtype,
is_datetimetz, is_dtype_equal,
_INT64_DTYPE,
_NS_DTYPE,
is_object_dtype,
is_datetime64_dtype,
is_datetimetz,
is_dtype_equal,
is_timedelta64_dtype,
is_integer, is_float,
is_integer,
is_float,
is_integer_dtype,
is_datetime64_ns_dtype,
is_period_dtype,
is_bool_dtype,
is_string_dtype,
is_categorical_dtype,
is_string_like,
is_list_like,
is_scalar,
Expand All @@ -36,20 +38,17 @@
from pandas.core.algorithms import checked_add_with_arr

from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.numeric import Int64Index, Float64Index
import pandas.compat as compat
from pandas.tseries.frequencies import (
to_offset, get_period_alias,
Resolution)
from pandas.tseries.frequencies import to_offset, get_period_alias, Resolution
from pandas.core.indexes.datetimelike import (
DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
from pandas.tseries.offsets import (
DateOffset, generate_range, Tick, CDay, prefix_mapping)

from pandas.core.tools.timedeltas import to_timedelta
from pandas.util._decorators import (Appender, cache_readonly,
deprecate_kwarg, Substitution)
from pandas.util._decorators import (
Appender, cache_readonly, deprecate_kwarg, Substitution)
import pandas.core.common as com
import pandas.tseries.offsets as offsets
import pandas.core.tools.datetimes as tools
Expand Down Expand Up @@ -906,25 +905,16 @@ def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), name=self.name,
dtype='i8')
elif is_datetime64_ns_dtype(dtype):
if self.tz is not None:
return self.tz_convert('UTC').tz_localize(None)
elif copy is True:
return self.copy()
return self
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
elif is_string_dtype(dtype):
return Index(self.format(), name=self.name, dtype=object)
if (is_datetime64_ns_dtype(dtype) and
not is_dtype_equal(dtype, self.dtype)):
# GH 18951: datetime64_ns dtype but not equal means different tz
new_tz = getattr(dtype, 'tz', None)
if getattr(self.dtype, 'tz', None) is None:
return self.tz_localize(new_tz)
return self.tz_convert(new_tz)
elif is_period_dtype(dtype):
return self.to_period(freq=dtype.freq)
raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
return super(DatetimeIndex, self).astype(dtype, copy=copy)

def _get_time_micros(self):
values = self.asi8
Expand Down
15 changes: 2 additions & 13 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
is_datetime_or_timedelta_dtype,
is_datetime64tz_dtype,
is_integer_dtype,
is_object_dtype,
is_categorical_dtype,
is_float_dtype,
is_interval_dtype,
is_scalar,
Expand All @@ -29,7 +27,6 @@
Interval, IntervalMixin, IntervalTree,
intervals_to_interval_bounds)

from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import date_range
from pandas.core.indexes.timedeltas import timedelta_range
from pandas.core.indexes.multi import MultiIndex
Expand Down Expand Up @@ -671,16 +668,8 @@ def copy(self, deep=False, name=None):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
if is_interval_dtype(dtype):
if copy:
self = self.copy()
return self
elif is_object_dtype(dtype):
return Index(self.values, dtype=object)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise ValueError('Cannot cast IntervalIndex to dtype {dtype}'
.format(dtype=dtype))
return self.copy() if copy else self
return super(IntervalIndex, self).astype(dtype, copy=copy)

@cache_readonly
def dtype(self):
Expand Down
29 changes: 9 additions & 20 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@
from pandas.core.dtypes.common import (
is_dtype_equal,
pandas_dtype,
is_float_dtype,
is_object_dtype,
needs_i8_conversion,
is_integer_dtype,
is_categorical_dtype,
is_bool,
is_bool_dtype,
is_scalar)
Expand All @@ -17,7 +15,6 @@
from pandas.core import algorithms
from pandas.core.indexes.base import (
Index, InvalidIndexError, _index_shared_docs)
from pandas.core.indexes.category import CategoricalIndex
from pandas.util._decorators import Appender, cache_readonly
import pandas.core.dtypes.concat as _concat
import pandas.core.indexes.base as ibase
Expand Down Expand Up @@ -315,22 +312,14 @@ def inferred_type(self):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)
if is_float_dtype(dtype):
values = self._values.astype(dtype, copy=copy)
elif is_integer_dtype(dtype):
if self.hasnans:
raise ValueError('cannot convert float NaN to integer')
values = self._values.astype(dtype, copy=copy)
elif is_object_dtype(dtype):
values = self._values.astype('object', copy=copy)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self, name=self.name, dtype=dtype,
copy=copy)
else:
raise TypeError('Setting {cls} dtype to anything other than '
'float64, object, or category is not supported'
.format(cls=self.__class__))
return Index(values, name=self.name, dtype=dtype)
if needs_i8_conversion(dtype):
msg = ('Cannot convert Float64Index to dtype {dtype}; integer '
'values are required for conversion').format(dtype=dtype)
raise TypeError(msg)
elif is_integer_dtype(dtype) and self.hasnans:
# GH 13149
raise ValueError('Cannot convert NA to integer')
return super(Float64Index, self).astype(dtype, copy=copy)

@Appender(_index_shared_docs['_convert_scalar_indexer'])
def _convert_scalar_indexer(self, key, kind=None):
Expand Down
26 changes: 7 additions & 19 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,21 @@
from pandas.core.dtypes.common import (
is_integer,
is_float,
is_object_dtype,
is_integer_dtype,
is_float_dtype,
is_scalar,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_datetime64_any_dtype,
is_timedelta64_dtype,
is_period_dtype,
is_bool_dtype,
is_categorical_dtype,
pandas_dtype,
_ensure_object)
from pandas.core.dtypes.dtypes import PeriodDtype
from pandas.core.dtypes.generic import ABCSeries

import pandas.tseries.frequencies as frequencies
from pandas.tseries.frequencies import get_freq_code as _gfc
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.datetimes import DatetimeIndex, Int64Index, Index
from pandas.core.indexes.timedeltas import TimedeltaIndex
from pandas.core.indexes.datetimelike import DatelikeOps, DatetimeIndexOpsMixin
Expand Down Expand Up @@ -506,23 +503,14 @@ def asof_locs(self, where, mask):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True, how='start'):
dtype = pandas_dtype(dtype)
if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_integer_dtype(dtype):
if copy:
return self._int64index.copy()
else:
return self._int64index
elif is_datetime64_dtype(dtype):
return self.to_timestamp(how=how)
elif is_datetime64tz_dtype(dtype):
return self.to_timestamp(how=how).tz_localize(dtype.tz)
if is_integer_dtype(dtype):
return self._int64index.copy() if copy else self._int64index
elif is_datetime64_any_dtype(dtype):
tz = getattr(dtype, 'tz', None)
return self.to_timestamp(how=how).tz_localize(tz)
elif is_period_dtype(dtype):
return self.asfreq(freq=dtype.freq)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
return super(PeriodIndex, self).astype(dtype, copy=copy)

@Substitution(klass='PeriodIndex')
@Appender(_shared_docs['searchsorted'])
Expand Down
29 changes: 6 additions & 23 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,20 @@
import numpy as np
from pandas.core.dtypes.common import (
_TD_DTYPE,
is_integer, is_float,
is_integer,
is_float,
is_bool_dtype,
is_list_like,
is_scalar,
is_integer_dtype,
is_object_dtype,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
is_categorical_dtype,
pandas_dtype,
_ensure_int64)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.common import _maybe_box, _values_from_object

from pandas.core.indexes.base import Index
from pandas.core.indexes.category import CategoricalIndex
from pandas.core.indexes.numeric import Int64Index
import pandas.compat as compat
from pandas.compat import u
Expand Down Expand Up @@ -483,28 +480,14 @@ def to_pytimedelta(self):
@Appender(_index_shared_docs['astype'])
def astype(self, dtype, copy=True):
dtype = pandas_dtype(dtype)

if is_object_dtype(dtype):
return self._box_values_as_index()
elif is_timedelta64_ns_dtype(dtype):
if copy is True:
return self.copy()
return self
elif is_timedelta64_dtype(dtype):
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# return an index (essentially this is division)
result = self.values.astype(dtype, copy=copy)
if self.hasnans:
return Index(self._maybe_mask_results(result,
convert='float64'),
name=self.name)
values = self._maybe_mask_results(result, convert='float64')
return Index(values, name=self.name)
return Index(result.astype('i8'), name=self.name)
elif is_integer_dtype(dtype):
return Index(self.values.astype('i8', copy=copy), dtype='i8',
name=self.name)
elif is_categorical_dtype(dtype):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)
raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
return super(TimedeltaIndex, self).astype(dtype, copy=copy)

def union(self, other):
"""
Expand Down
Loading