Skip to content

Commit 66f4a28

Browse files
jbrockmendelSeeminSyed
authored andcommitted
ENH: IntegerArray.astype(dt64) (pandas-dev#32538)
1 parent 2d20b00 commit 66f4a28

File tree

6 files changed

+24
-18
lines changed

6 files changed

+24
-18
lines changed

pandas/_libs/tslib.pyx

+4-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ PyDateTime_IMPORT
1414

1515

1616
cimport numpy as cnp
17-
from numpy cimport float64_t, int64_t, ndarray
17+
from numpy cimport float64_t, int64_t, ndarray, uint8_t
1818
import numpy as np
1919
cnp.import_array()
2020

@@ -351,7 +351,6 @@ def format_array_from_datetime(
351351

352352
def array_with_unit_to_datetime(
353353
ndarray values,
354-
ndarray mask,
355354
object unit,
356355
str errors='coerce'
357356
):
@@ -373,8 +372,6 @@ def array_with_unit_to_datetime(
373372
----------
374373
values : ndarray of object
375374
Date-like objects to convert.
376-
mask : boolean ndarray
377-
Not-a-time mask for non-nullable integer types conversion, can be None.
378375
unit : object
379376
Time unit to use during conversion.
380377
errors : str, default 'raise'
@@ -395,6 +392,7 @@ def array_with_unit_to_datetime(
395392
bint need_to_iterate = True
396393
ndarray[int64_t] iresult
397394
ndarray[object] oresult
395+
ndarray mask
398396
object tz = None
399397

400398
assert is_ignore or is_coerce or is_raise
@@ -404,9 +402,6 @@ def array_with_unit_to_datetime(
404402
result = values.astype('M8[ns]')
405403
else:
406404
result, tz = array_to_datetime(values.astype(object), errors=errors)
407-
if mask is not None:
408-
iresult = result.view('i8')
409-
iresult[mask] = NPY_NAT
410405
return result, tz
411406

412407
m = cast_from_unit(None, unit)
@@ -419,9 +414,8 @@ def array_with_unit_to_datetime(
419414
if values.dtype.kind == "i":
420415
# Note: this condition makes the casting="same_kind" redundant
421416
iresult = values.astype('i8', casting='same_kind', copy=False)
422-
# If no mask, fill mask by comparing to NPY_NAT constant
423-
if mask is None:
424-
mask = iresult == NPY_NAT
417+
# fill by comparing to NPY_NAT constant
418+
mask = iresult == NPY_NAT
425419
iresult[mask] = 0
426420
fvalues = iresult.astype('f8') * m
427421
need_to_iterate = False

pandas/core/arrays/integer.py

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas.core.dtypes.cast import astype_nansafe
1414
from pandas.core.dtypes.common import (
1515
is_bool_dtype,
16+
is_datetime64_dtype,
1617
is_float,
1718
is_float_dtype,
1819
is_integer,
@@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
469470
if is_float_dtype(dtype):
470471
# In astype, we consider dtype=float to also mean na_value=np.nan
471472
kwargs = dict(na_value=np.nan)
473+
elif is_datetime64_dtype(dtype):
474+
kwargs = dict(na_value=np.datetime64("NaT"))
472475
else:
473476
kwargs = {}
474477

pandas/core/tools/datetimes.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -323,15 +323,13 @@ def _convert_listlike_datetimes(
323323
# GH 30050 pass an ndarray to tslib.array_with_unit_to_datetime
324324
# because it expects an ndarray argument
325325
if isinstance(arg, IntegerArray):
326-
# Explicitly pass NaT mask to array_with_unit_to_datetime
327-
mask = arg.isna()
328-
arg = arg._ndarray_values
326+
result = arg.astype(f"datetime64[{unit}]")
327+
tz_parsed = None
329328
else:
330-
mask = None
331329

332-
result, tz_parsed = tslib.array_with_unit_to_datetime(
333-
arg, mask, unit, errors=errors
334-
)
330+
result, tz_parsed = tslib.array_with_unit_to_datetime(
331+
arg, unit, errors=errors
332+
)
335333

336334
if errors == "ignore":
337335
from pandas import Index

pandas/tests/arrays/test_array.py

+2
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ def test_array_copy():
222222
# integer
223223
([1, 2], IntegerArray._from_sequence([1, 2])),
224224
([1, None], IntegerArray._from_sequence([1, None])),
225+
([1, pd.NA], IntegerArray._from_sequence([1, pd.NA])),
226+
([1, np.nan], IntegerArray._from_sequence([1, np.nan])),
225227
# string
226228
(["a", "b"], StringArray._from_sequence(["a", "b"])),
227229
(["a", None], StringArray._from_sequence(["a", None])),

pandas/tests/arrays/test_integer.py

+9
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,15 @@ def test_astype_specific_casting(self, dtype):
633633
expected = pd.Series([1, 2, 3, None], dtype=dtype)
634634
tm.assert_series_equal(result, expected)
635635

636+
def test_astype_dt64(self):
637+
# GH#32435
638+
arr = pd.array([1, 2, 3, pd.NA]) * 10 ** 9
639+
640+
result = arr.astype("datetime64[ns]")
641+
642+
expected = np.array([1, 2, 3, "NaT"], dtype="M8[s]").astype("M8[ns]")
643+
tm.assert_numpy_array_equal(result, expected)
644+
636645
def test_construct_cast_invalid(self, dtype):
637646

638647
msg = "cannot safely"

pandas/tests/frame/test_dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -505,7 +505,7 @@ def test_df_where_change_dtype(self):
505505

506506
@pytest.mark.parametrize("dtype", ["M8", "m8"])
507507
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"])
508-
def test_astype_from_datetimelike_to_objectt(self, dtype, unit):
508+
def test_astype_from_datetimelike_to_object(self, dtype, unit):
509509
# tests astype to object dtype
510510
# gh-19223 / gh-12425
511511
dtype = f"{dtype}[{unit}]"

0 commit comments

Comments
 (0)