Skip to content

REF: share .astype code for astype_nansafe + TDA.astype #38481

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
)
from pandas.compat.numpy import function as nv

from pandas.core.dtypes.cast import astype_td64_unit_conversion
from pandas.core.dtypes.common import (
DT64NS_DTYPE,
TD64NS_DTYPE,
Expand All @@ -35,7 +36,6 @@
is_scalar,
is_string_dtype,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
pandas_dtype,
)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
Expand Down Expand Up @@ -324,22 +324,14 @@ def astype(self, dtype, copy: bool = True):
# DatetimeLikeArrayMixin super call handles other cases
dtype = pandas_dtype(dtype)

if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# by pandas convention, converting to non-nano timedelta64
# returns an int64-dtyped array with ints representing multiples
# of the desired timedelta unit. This is essentially division
if self._hasnans:
# avoid double-copying
result = self._data.astype(dtype, copy=False)
return self._maybe_mask_results(
result, fill_value=None, convert="float64"
)
result = self._data.astype(dtype, copy=copy)
return result.astype("i8")
elif is_timedelta64_ns_dtype(dtype):
if is_dtype_equal(dtype, self.dtype):
if copy:
return self.copy()
return self

elif dtype.kind == "m":
return astype_td64_unit_conversion(self._data, dtype, copy=copy)

return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)

def __iter__(self):
Expand Down
47 changes: 35 additions & 12 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@

from pandas.core.dtypes.common import (
DT64NS_DTYPE,
INT64_DTYPE,
POSSIBLY_CAST_DTYPES,
TD64NS_DTYPE,
ensure_int8,
Expand Down Expand Up @@ -952,6 +951,39 @@ def coerce_indexer_dtype(indexer, categories):
return ensure_int64(indexer)


def astype_td64_unit_conversion(
values: np.ndarray, dtype: np.dtype, copy: bool
) -> np.ndarray:
"""
By pandas convention, converting to non-nano timedelta64
returns an int64-dtyped array with ints representing multiples
of the desired timedelta unit. This is essentially division.

Parameters
----------
values : np.ndarray[timedelta64[ns]]
dtype : np.dtype
timedelta64 with unit not-necessarily nano
copy : bool

Returns
-------
np.ndarray
"""
if is_dtype_equal(values.dtype, dtype):
if copy:
return values.copy()
return values

# otherwise we are converting to non-nano
result = values.astype(dtype, copy=False) # avoid double-copying
result = result.astype(np.float64)

mask = isna(values)
np.putmask(result, mask, np.nan)
return result


def astype_nansafe(
arr, dtype: DtypeObj, copy: bool = True, skipna: bool = False
) -> ArrayLike:
Expand Down Expand Up @@ -1007,17 +1039,8 @@ def astype_nansafe(
raise ValueError("Cannot convert NaT values to integer")
return arr.view(dtype)

if dtype not in [INT64_DTYPE, TD64NS_DTYPE]:

# allow frequency conversions
# we return a float here!
if dtype.kind == "m":
mask = isna(arr)
result = arr.astype(dtype).astype(np.float64)
result[mask] = np.nan
return result
elif dtype == TD64NS_DTYPE:
return arr.astype(TD64NS_DTYPE, copy=copy)
elif dtype.kind == "m":
return astype_td64_unit_conversion(arr, dtype, copy=copy)

raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

Expand Down
29 changes: 3 additions & 26 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,24 +4,14 @@
from pandas._libs.tslibs import Timedelta, to_offset
from pandas._typing import DtypeObj
from pandas.errors import InvalidIndexError
from pandas.util._decorators import doc

from pandas.core.dtypes.common import (
TD64NS_DTYPE,
is_scalar,
is_timedelta64_dtype,
is_timedelta64_ns_dtype,
pandas_dtype,
)

from pandas.core.dtypes.common import TD64NS_DTYPE, is_scalar, is_timedelta64_dtype

from pandas.core.arrays import datetimelike as dtl
from pandas.core.arrays.timedeltas import TimedeltaArray
import pandas.core.common as com
from pandas.core.indexes.base import Index, maybe_extract_name
from pandas.core.indexes.datetimelike import (
DatetimeIndexOpsMixin,
DatetimeTimedeltaMixin,
)
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
from pandas.core.indexes.extension import inherit_names


Expand Down Expand Up @@ -159,19 +149,6 @@ def __new__(

# -------------------------------------------------------------------

@doc(Index.astype)
def astype(self, dtype, copy: bool = True):
dtype = pandas_dtype(dtype)
if is_timedelta64_dtype(dtype) and not is_timedelta64_ns_dtype(dtype):
# Have to repeat the check for 'timedelta64' (not ns) dtype
# so that we can return a numeric index, since pandas will return
# a TimedeltaIndex when dtype='timedelta'
result = self._data.astype(dtype, copy=copy)
if self.hasnans:
return Index(result, name=self.name)
return Index(result.astype("i8"), name=self.name)
return DatetimeIndexOpsMixin.astype(self, dtype, copy=copy)

def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
"""
Can we compare values of the given dtype to our own?
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/timedeltas/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,21 @@ def test_fields(self):
rng.name = "name"
assert rng.days.name == "name"

def test_freq_conversion_always_floating(self):
# even if we have no NaTs, we get back float64; this matches TDA and Series
tdi = timedelta_range("1 Day", periods=30)

res = tdi.astype("m8[s]")
expected = Index((tdi.view("i8") / 10 ** 9).astype(np.float64))
tm.assert_index_equal(res, expected)

# check this matches Series and TimedeltaArray
res = tdi._data.astype("m8[s]")
tm.assert_numpy_array_equal(res, expected._values)

res = tdi.to_series().astype("m8[s]")
tm.assert_numpy_array_equal(res._values, expected._values)

def test_freq_conversion(self):

# doc example
Expand Down