Skip to content

Commit 83b902c

Browse files
jbrockmendelPingviinituutti
authored andcommitted
implement astype portion of pandas-dev#24024 (pandas-dev#24405)
1 parent 279301b commit 83b902c

File tree

17 files changed

+280
-115
lines changed

17 files changed

+280
-115
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,7 @@ Datetimelike
13281328
- Bug in :func:`to_datetime` where ``box`` and ``utc`` arguments were ignored when passing a :class:`DataFrame` or ``dict`` of unit mappings (:issue:`23760`)
13291329
- Bug in :attr:`Series.dt` where the cache would not update properly after an in-place operation (:issue:`24408`)
13301330
- Bug in :class:`PeriodIndex` where comparisons against an array-like object with length 1 failed to raise ``ValueError`` (:issue:`23078`)
1331+
- Bug in :meth:`DatetimeIndex.astype`, :meth:`PeriodIndex.astype` and :meth:`TimedeltaIndex.astype` ignoring the sign of the ``dtype`` for unsigned integer dtypes (:issue:`24405`).
13311332

13321333
Timedelta
13331334
^^^^^^^^^

pandas/core/arrays/datetimelike.py

+53-6
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@
1717
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
1818

1919
from pandas.core.dtypes.common import (
20-
is_bool_dtype, is_datetime64_any_dtype, is_datetime64_dtype,
21-
is_datetime64tz_dtype, is_extension_array_dtype, is_float_dtype,
22-
is_integer_dtype, is_list_like, is_object_dtype, is_offsetlike,
23-
is_period_dtype, is_timedelta64_dtype, needs_i8_conversion)
20+
is_bool_dtype, is_categorical_dtype, is_datetime64_any_dtype,
21+
is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype,
22+
is_dtype_equal, is_extension_array_dtype, is_float_dtype, is_integer_dtype,
23+
is_list_like, is_object_dtype, is_offsetlike, is_period_dtype,
24+
is_string_dtype, is_timedelta64_dtype, is_unsigned_integer_dtype,
25+
needs_i8_conversion, pandas_dtype)
2426
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
2527
from pandas.core.dtypes.missing import isna
2628

@@ -315,7 +317,7 @@ def _ndarray_values(self):
315317
# ----------------------------------------------------------------
316318
# Rendering Methods
317319

318-
def _format_native_types(self, na_rep=u'NaT', date_format=None):
320+
def _format_native_types(self, na_rep='NaT', date_format=None):
319321
"""
320322
Helper method for astype when converting to strings.
321323
@@ -403,9 +405,54 @@ def __getitem__(self, key):
403405
return self._simple_new(result, **attribs)
404406

405407
def astype(self, dtype, copy=True):
408+
# Some notes on cases we don't have to handle here in the base class:
409+
# 1. PeriodArray.astype handles period -> period
410+
# 2. DatetimeArray.astype handles conversion between tz.
411+
# 3. DatetimeArray.astype handles datetime -> period
412+
from pandas import Categorical
413+
dtype = pandas_dtype(dtype)
414+
406415
if is_object_dtype(dtype):
407416
return self._box_values(self.asi8)
408-
return super(DatetimeLikeArrayMixin, self).astype(dtype, copy)
417+
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
418+
return self._format_native_types()
419+
elif is_integer_dtype(dtype):
420+
# we deliberately ignore int32 vs. int64 here.
421+
# See https://github.com/pandas-dev/pandas/issues/24381 for more.
422+
values = self.asi8
423+
424+
if is_unsigned_integer_dtype(dtype):
425+
# Again, we ignore int32 vs. int64
426+
values = values.view("uint64")
427+
428+
if copy:
429+
values = values.copy()
430+
return values
431+
elif (is_datetime_or_timedelta_dtype(dtype) and
432+
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
433+
# disallow conversion between datetime/timedelta,
434+
# and conversions for any datetimelike to float
435+
msg = 'Cannot cast {name} to dtype {dtype}'
436+
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
437+
elif is_categorical_dtype(dtype):
438+
return Categorical(self, dtype=dtype)
439+
else:
440+
return np.asarray(self, dtype=dtype)
441+
442+
def view(self, dtype=None):
443+
"""
444+
New view on this array with the same data.
445+
446+
Parameters
447+
----------
448+
dtype : numpy dtype, optional
449+
450+
Returns
451+
-------
452+
ndarray
453+
With the specified `dtype`.
454+
"""
455+
return self._data.view(dtype=dtype)
409456

410457
# ------------------------------------------------------------------
411458
# ExtensionArray Interface

pandas/core/arrays/datetimes.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,9 @@
1515

1616
from pandas.core.dtypes.common import (
1717
_INT64_DTYPE, _NS_DTYPE, is_categorical_dtype, is_datetime64_dtype,
18-
is_datetime64tz_dtype, is_extension_type, is_float_dtype, is_int64_dtype,
19-
is_object_dtype, is_period_dtype, is_string_dtype, is_timedelta64_dtype)
18+
is_datetime64_ns_dtype, is_datetime64tz_dtype, is_dtype_equal,
19+
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
20+
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
2021
from pandas.core.dtypes.dtypes import DatetimeTZDtype
2122
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
2223
from pandas.core.dtypes.missing import isna
@@ -473,6 +474,35 @@ def __iter__(self):
473474
for v in converted:
474475
yield v
475476

477+
def astype(self, dtype, copy=True):
478+
# We handle
479+
# --> datetime
480+
# --> period
481+
# DatetimeLikeArrayMixin Super handles the rest.
482+
dtype = pandas_dtype(dtype)
483+
484+
if (is_datetime64_ns_dtype(dtype) and
485+
not is_dtype_equal(dtype, self.dtype)):
486+
# GH#18951: datetime64_ns dtype but not equal means different tz
487+
new_tz = getattr(dtype, 'tz', None)
488+
if getattr(self.dtype, 'tz', None) is None:
489+
return self.tz_localize(new_tz)
490+
result = self.tz_convert(new_tz)
491+
if new_tz is None:
492+
# Do we want .astype('datetime64[ns]') to be an ndarray.
493+
# The astype in Block._astype expects this to return an
494+
# ndarray, but we could maybe work around it there.
495+
result = result._data
496+
return result
497+
elif is_datetime64tz_dtype(self.dtype) and is_dtype_equal(self.dtype,
498+
dtype):
499+
if copy:
500+
return self.copy()
501+
return self
502+
elif is_period_dtype(dtype):
503+
return self.to_period(freq=dtype.freq)
504+
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
505+
476506
# ----------------------------------------------------------------
477507
# ExtensionArray Interface
478508

@@ -495,7 +525,7 @@ def _validate_fill_value(self, fill_value):
495525
# -----------------------------------------------------------------
496526
# Rendering Methods
497527

498-
def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
528+
def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs):
499529
from pandas.io.formats.format import _get_format_datetime64_from_values
500530
fmt = _get_format_datetime64_from_values(self, date_format)
501531

pandas/core/arrays/period.py

+6-37
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@
1515
from pandas.util._validators import validate_fillna_kwargs
1616

1717
from pandas.core.dtypes.common import (
18-
_TD_DTYPE, ensure_object, is_array_like, is_categorical_dtype,
19-
is_datetime64_dtype, is_datetime_or_timedelta_dtype, is_dtype_equal,
20-
is_float_dtype, is_integer_dtype, is_list_like, is_object_dtype,
21-
is_period_dtype, is_string_dtype, pandas_dtype)
18+
_TD_DTYPE, ensure_object, is_array_like, is_datetime64_dtype,
19+
is_float_dtype, is_list_like, is_period_dtype, pandas_dtype)
2220
from pandas.core.dtypes.dtypes import PeriodDtype
2321
from pandas.core.dtypes.generic import ABCIndexClass, ABCPeriodIndex, ABCSeries
2422
from pandas.core.dtypes.missing import isna, notna
@@ -599,42 +597,13 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
599597
# ------------------------------------------------------------------
600598

601599
def astype(self, dtype, copy=True):
602-
# TODO: Figure out something better here...
603-
# We have DatetimeLikeArrayMixin ->
604-
# super(...), which ends up being... DatetimeIndexOpsMixin?
605-
# this is complicated.
606-
# need a pandas_astype(arr, dtype).
607-
from pandas import Categorical
608-
600+
# We handle Period[T] -> Period[U]
601+
# Our parent handles everything else.
609602
dtype = pandas_dtype(dtype)
610603

611-
if is_object_dtype(dtype):
612-
return np.asarray(self, dtype=object)
613-
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
614-
return self._format_native_types()
615-
elif is_integer_dtype(dtype):
616-
values = self._data
617-
618-
if values.dtype != dtype:
619-
# int32 vs. int64
620-
values = values.astype(dtype)
621-
622-
elif copy:
623-
values = values.copy()
624-
625-
return values
626-
elif (is_datetime_or_timedelta_dtype(dtype) and
627-
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
628-
# disallow conversion between datetime/timedelta,
629-
# and conversions for any datetimelike to float
630-
msg = 'Cannot cast {name} to dtype {dtype}'
631-
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
632-
elif is_categorical_dtype(dtype):
633-
return Categorical(self, dtype=dtype)
634-
elif is_period_dtype(dtype):
604+
if is_period_dtype(dtype):
635605
return self.asfreq(dtype.freq)
636-
else:
637-
return np.asarray(self, dtype=dtype)
606+
return super(PeriodArray, self).astype(dtype, copy=copy)
638607

639608
@property
640609
def flags(self):

pandas/core/arrays/timedeltas.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
from pandas.core.dtypes.common import (
1818
_NS_DTYPE, _TD_DTYPE, ensure_int64, is_datetime64_dtype, is_float_dtype,
1919
is_integer_dtype, is_list_like, is_object_dtype, is_scalar,
20-
is_string_dtype, is_timedelta64_dtype)
20+
is_string_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype,
21+
pandas_dtype)
2122
from pandas.core.dtypes.dtypes import DatetimeTZDtype
2223
from pandas.core.dtypes.generic import (
2324
ABCDataFrame, ABCIndexClass, ABCSeries, ABCTimedeltaIndex)
@@ -234,15 +235,44 @@ def _validate_fill_value(self, fill_value):
234235
"Got '{got}'.".format(got=fill_value))
235236
return fill_value
236237

238+
def astype(self, dtype, copy=True):
239+
# We handle
240+
# --> timedelta64[ns]
241+
# --> timedelta64
242+
# DatetimeLikeArrayMixin super call handles other cases
243+
dtype = pandas_dtype(dtype)
244+
245+
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
246+
# by pandas convention, converting to non-nano timedelta64
247+
# returns an int64-dtyped array with ints representing multiples
248+
# of the desired timedelta unit. This is essentially division
249+
if self._hasnans:
250+
# avoid double-copying
251+
result = self._data.astype(dtype, copy=False)
252+
values = self._maybe_mask_results(result,
253+
fill_value=None,
254+
convert='float64')
255+
return values
256+
result = self._data.astype(dtype, copy=copy)
257+
return result.astype('i8')
258+
elif is_timedelta64_ns_dtype(dtype):
259+
if copy:
260+
return self.copy()
261+
return self
262+
return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
263+
237264
# ----------------------------------------------------------------
238265
# Rendering Methods
239266

240267
def _formatter(self, boxed=False):
241268
from pandas.io.formats.format import _get_format_timedelta64
242269
return _get_format_timedelta64(self, box=True)
243270

244-
def _format_native_types(self):
245-
return self.astype(object)
271+
def _format_native_types(self, na_rep='NaT', date_format=None):
272+
from pandas.io.formats.format import _get_format_timedelta64
273+
274+
formatter = _get_format_timedelta64(self._data, na_rep)
275+
return np.array([formatter(x) for x in self._data])
246276

247277
# ----------------------------------------------------------------
248278
# Arithmetic Methods

pandas/core/dtypes/missing.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,8 @@
1414
is_period_dtype, is_scalar, is_string_dtype, is_string_like_dtype,
1515
is_timedelta64_dtype, needs_i8_conversion, pandas_dtype)
1616
from .generic import (
17-
ABCExtensionArray, ABCGeneric, ABCIndexClass, ABCMultiIndex, ABCSeries)
17+
ABCDatetimeArray, ABCExtensionArray, ABCGeneric, ABCIndexClass,
18+
ABCMultiIndex, ABCSeries, ABCTimedeltaArray)
1819
from .inference import is_list_like
1920

2021
isposinf_scalar = libmissing.isposinf_scalar
@@ -108,7 +109,8 @@ def _isna_new(obj):
108109
elif isinstance(obj, ABCMultiIndex):
109110
raise NotImplementedError("isna is not defined for MultiIndex")
110111
elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass,
111-
ABCExtensionArray)):
112+
ABCExtensionArray,
113+
ABCDatetimeArray, ABCTimedeltaArray)):
112114
return _isna_ndarraylike(obj)
113115
elif isinstance(obj, ABCGeneric):
114116
return obj._constructor(obj._data.isna(func=isna))

pandas/core/indexes/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -742,8 +742,9 @@ def view(self, cls=None):
742742
Parameters
743743
----------
744744
dtype : numpy dtype or pandas type
745-
Note that any integer `dtype` is treated as ``'int64'``,
746-
regardless of the sign and size.
745+
Note that any signed integer `dtype` is treated as ``'int64'``,
746+
and any unsigned integer `dtype` is treated as ``'uint64'``,
747+
regardless of the size.
747748
copy : bool, default True
748749
By default, astype always returns a newly allocated object.
749750
If copy is set to False and internal requirements on dtype are

pandas/core/indexes/datetimelike.py

+14-21
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,8 @@
1313
from pandas.util._decorators import Appender, cache_readonly
1414

1515
from pandas.core.dtypes.common import (
16-
ensure_int64, is_bool_dtype, is_categorical_dtype,
17-
is_datetime_or_timedelta_dtype, is_dtype_equal, is_float, is_float_dtype,
18-
is_integer, is_integer_dtype, is_list_like, is_object_dtype,
19-
is_period_dtype, is_scalar, is_string_dtype)
16+
ensure_int64, is_bool_dtype, is_dtype_equal, is_float, is_integer,
17+
is_list_like, is_period_dtype, is_scalar)
2018
from pandas.core.dtypes.generic import ABCIndex, ABCIndexClass, ABCSeries
2119

2220
from pandas.core import algorithms, ops
@@ -39,6 +37,7 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
3937

4038
# override DatetimeLikeArrayMixin method
4139
copy = Index.copy
40+
view = Index.view
4241

4342
# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
4443
# properties there. They can be made into cache_readonly for Index
@@ -550,24 +549,18 @@ def _maybe_box_as_values(self, values, **attribs):
550549
# - sort_values
551550
return values
552551

552+
@Appender(_index_shared_docs['astype'])
553553
def astype(self, dtype, copy=True):
554-
if is_object_dtype(dtype):
555-
return self._box_values_as_index()
556-
elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
557-
return Index(self.format(), name=self.name, dtype=object)
558-
elif is_integer_dtype(dtype):
559-
# TODO(DatetimeArray): use self._values here.
560-
# Can't use ._values currently, because that returns a
561-
# DatetimeIndex, which throws us in an infinite loop.
562-
return Index(self.values.astype('i8', copy=copy), name=self.name,
563-
dtype='i8')
564-
elif (is_datetime_or_timedelta_dtype(dtype) and
565-
not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
566-
# disallow conversion between datetime/timedelta,
567-
# and conversions for any datetimelike to float
568-
msg = 'Cannot cast {name} to dtype {dtype}'
569-
raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
570-
return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
554+
if is_dtype_equal(self.dtype, dtype) and copy is False:
555+
# Ensure that self.astype(self.dtype) is self
556+
return self
557+
558+
new_values = self._eadata.astype(dtype, copy=copy)
559+
560+
# pass copy=False because any copying will be done in the
561+
# _eadata.astype call above
562+
return Index(new_values,
563+
dtype=new_values.dtype, name=self.name, copy=False)
571564

572565
@Appender(DatetimeLikeArrayMixin._time_shift.__doc__)
573566
def _time_shift(self, periods, freq=None):

0 commit comments

Comments
 (0)