Skip to content

ENH: IntegerArray.astype(dt64) #32538

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 14, 2020
14 changes: 4 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ PyDateTime_IMPORT


cimport numpy as cnp
from numpy cimport float64_t, int64_t, ndarray
from numpy cimport float64_t, int64_t, ndarray, uint8_t
import numpy as np
cnp.import_array()

Expand Down Expand Up @@ -351,7 +351,6 @@ def format_array_from_datetime(

def array_with_unit_to_datetime(
ndarray values,
ndarray mask,
object unit,
str errors='coerce'
):
Expand All @@ -373,8 +372,6 @@ def array_with_unit_to_datetime(
----------
values : ndarray of object
Date-like objects to convert.
mask : boolean ndarray
Not-a-time mask for non-nullable integer types conversion, can be None.
unit : object
Time unit to use during conversion.
errors : str, default 'raise'
Expand All @@ -395,6 +392,7 @@ def array_with_unit_to_datetime(
bint need_to_iterate = True
ndarray[int64_t] iresult
ndarray[object] oresult
ndarray mask
object tz = None

assert is_ignore or is_coerce or is_raise
Expand All @@ -404,9 +402,6 @@ def array_with_unit_to_datetime(
result = values.astype('M8[ns]')
else:
result, tz = array_to_datetime(values.astype(object), errors=errors)
if mask is not None:
iresult = result.view('i8')
iresult[mask] = NPY_NAT
return result, tz

m = cast_from_unit(None, unit)
Expand All @@ -419,9 +414,8 @@ def array_with_unit_to_datetime(
if values.dtype.kind == "i":
# Note: this condition makes the casting="same_kind" redundant
iresult = values.astype('i8', casting='same_kind', copy=False)
# If no mask, fill mask by comparing to NPY_NAT constant
if mask is None:
mask = iresult == NPY_NAT
# fill by comparing to NPY_NAT constant
mask = iresult == NPY_NAT
iresult[mask] = 0
fvalues = iresult.astype('f8') * m
need_to_iterate = False
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.dtypes.common import (
is_bool_dtype,
is_datetime64_dtype,
is_float,
is_float_dtype,
is_integer,
Expand Down Expand Up @@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
if is_float_dtype(dtype):
# In astype, we consider dtype=float to also mean na_value=np.nan
kwargs = dict(na_value=np.nan)
elif is_datetime64_dtype(dtype):
kwargs = dict(na_value=np.datetime64("NaT"))
else:
kwargs = {}

Expand Down
12 changes: 5 additions & 7 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,15 +323,13 @@ def _convert_listlike_datetimes(
# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
# because it expects an ndarray argument
if isinstance(arg, IntegerArray):
# Explicitly pass NaT mask to array_with_unit_to_datetime
mask = arg.isna()
arg = arg._ndarray_values
result = arg.astype(f"datetime64[{unit}]")
tz_parsed = None
else:
mask = None

result, tz_parsed = tslib.array_with_unit_to_datetime(
arg, mask, unit, errors=errors
)
result, tz_parsed = tslib.array_with_unit_to_datetime(
arg, unit, errors=errors
)

if errors == "ignore":
from pandas import Index
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/arrays/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ def test_array_copy():
# integer
([1, 2], IntegerArray._from_sequence([1, 2])),
([1, None], IntegerArray._from_sequence([1, None])),
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])),
([1, np.nan], IntegerArray._from_sequence([1, np.nan])),
# string
(["a", "b"], StringArray._from_sequence(["a", "b"])),
(["a", None], StringArray._from_sequence(["a", None])),
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,15 @@ def test_astype_specific_casting(self, dtype):
expected = pd.Series([1, 2, 3, None], dtype=dtype)
tm.assert_series_equal(result, expected)

def test_astype_dt64(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have tests with None, np.nan here as well?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess those would go in the test for pd.array inference in tests.arrays.test_array, where I see a test for [1, None]

# GH#32435
arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9

result = arr.astype("datetime64[ns]")

expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]")
tm.assert_numpy_array_equal(result, expected)

def test_construct_cast_invalid(self, dtype):

msg = "cannot safely"
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def test_df_where_change_dtype(self):

@pytest.mark.parametrize("dtype", ["M8", "m8"])
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
def test_astype_from_datetimelike_to_objectt(self, dtype, unit):
def test_astype_from_datetimelike_to_object(self, dtype, unit):
# tests astype to object dtype
# gh-19223 / gh-12425
dtype = f"{dtype}[{unit}]"
Expand Down