Skip to content

Commit 29fbc64

Browse files
committed
BUG/TST: assure conversions of datetimelikes for object, numeric dtypes
closes pandas-dev#19176
1 parent 8347ff8 commit 29fbc64

File tree

7 files changed

+129
-42
lines changed

7 files changed

+129
-42
lines changed

doc/source/whatsnew/v0.23.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ Conversion
385385
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
386386
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
387387

388+
389+
390+
- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19222`)
391+
392+
388393
Indexing
389394
^^^^^^^^
390395

pandas/_libs/tslibs/conversion.pyx

+24-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,
2929

3030
from util cimport (is_string_object,
3131
is_datetime64_object,
32-
is_integer_object, is_float_object)
32+
is_integer_object, is_float_object, is_array)
3333

3434
from timedeltas cimport cast_from_unit
3535
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
@@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
4545
# Constants
4646

4747
cdef int64_t DAY_NS = 86400000000000LL
48+
NS_DTYPE = np.dtype('M8[ns]')
49+
TD_DTYPE = np.dtype('m8[ns]')
4850

4951
UTC = pytz.UTC
5052

@@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
7375
return ival
7476

7577

76-
def ensure_datetime64ns(ndarray arr):
78+
def ensure_datetime64ns(ndarray arr, copy=True):
7779
"""
7880
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
7981
8082
Parameters
8183
----------
8284
arr : ndarray
85+
copy : boolean, default True
8386
8487
Returns
8588
-------
@@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):
104107

105108
unit = get_datetime64_unit(arr.flat[0])
106109
if unit == PANDAS_FR_ns:
110+
if copy :
111+
arr = arr.copy()
107112
result = arr
108113
else:
109114
for i in range(n):
@@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
117122
return result
118123

119124

125+
def ensure_timedelta64ns(ndarray arr, copy=True):
126+
"""
127+
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
128+
129+
Parameters
130+
----------
131+
arr : ndarray
132+
copy : boolean, default True
133+
134+
Returns
135+
-------
136+
result : ndarray with dtype timedelta64[ns]
137+
138+
"""
139+
return arr.astype(TD_DTYPE, copy=copy)
140+
141+
120142
def datetime_to_datetime64(ndarray[object] values):
121143
"""
122144
Convert ndarray of datetime-like objects to int64 array representing

pandas/core/dtypes/cast.py

+18-34
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
from pandas._libs import tslib, lib
99
from pandas._libs.tslib import iNaT
10-
from pandas.compat import string_types, text_type, PY3
10+
from pandas.compat import string_types, text_type
1111
from .common import (_ensure_object, is_bool, is_integer, is_float,
1212
is_complex, is_datetimetz, is_categorical_dtype,
1313
is_datetimelike,
@@ -24,7 +24,7 @@
2424
pandas_dtype,
2525
_ensure_int8, _ensure_int16,
2626
_ensure_int32, _ensure_int64,
27-
_NS_DTYPE, _TD_DTYPE, _INT64_DTYPE,
27+
_NS_DTYPE, _TD_DTYPE,
2828
_POSSIBLY_CAST_DTYPES)
2929
from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype
3030
from .generic import (ABCDatetimeIndex, ABCPeriodIndex,
@@ -656,33 +656,29 @@ def astype_nansafe(arr, dtype, copy=True):
656656
return tslib.ints_to_pydatetime(arr.view(np.int64))
657657
elif dtype == np.int64:
658658
return arr.view(dtype)
659-
elif dtype != _NS_DTYPE:
660-
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
661-
"to [{to_dtype}]".format(from_dtype=arr.dtype,
662-
to_dtype=dtype))
663-
return arr.astype(_NS_DTYPE)
659+
660+
# allow frequency conversions
661+
if dtype.kind == 'M':
662+
return arr.astype(dtype)
663+
664+
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
665+
"to [{to_dtype}]".format(from_dtype=arr.dtype,
666+
to_dtype=dtype))
667+
664668
elif is_timedelta64_dtype(arr):
665669
if dtype == np.int64:
666670
return arr.view(dtype)
667671
elif dtype == object:
668672
return tslib.ints_to_pytimedelta(arr.view(np.int64))
669673

670-
# in py3, timedelta64[ns] are int64
671-
elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
672-
(not PY3 and dtype != _TD_DTYPE)):
673-
674-
# allow frequency conversions
675-
if dtype.kind == 'm':
676-
mask = isna(arr)
677-
result = arr.astype(dtype).astype(np.float64)
678-
result[mask] = np.nan
679-
return result
674+
# allow frequency conversions
675+
if dtype.kind == 'm':
676+
return arr.astype(dtype)
680677

681-
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
682-
"to [{to_dtype}]".format(from_dtype=arr.dtype,
683-
to_dtype=dtype))
678+
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
679+
"to [{to_dtype}]".format(from_dtype=arr.dtype,
680+
to_dtype=dtype))
684681

685-
return arr.astype(_TD_DTYPE)
686682
elif (np.issubdtype(arr.dtype, np.floating) and
687683
np.issubdtype(dtype, np.integer)):
688684

@@ -704,19 +700,7 @@ def astype_nansafe(arr, dtype, copy=True):
704700

705701
if copy:
706702

707-
if arr.dtype == dtype:
708-
return arr.copy()
709-
710-
# we handle datetimelikes with pandas machinery
711-
# to be robust to the input type
712-
elif is_datetime64_dtype(dtype):
713-
from pandas import to_datetime
714-
return to_datetime(arr).values
715-
elif is_timedelta64_dtype(dtype):
716-
from pandas import to_timedelta
717-
return to_timedelta(arr).values
718-
719-
return arr.astype(dtype)
703+
return arr.astype(dtype, copy=True)
720704
return arr.view(dtype)
721705

722706

pandas/core/dtypes/common.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandas.compat import (string_types, text_type, binary_type,
55
PY3, PY36)
66
from pandas._libs import algos, lib
7+
from pandas._libs.tslibs import conversion
78
from .dtypes import (CategoricalDtype, CategoricalDtypeType,
89
DatetimeTZDtype, DatetimeTZDtypeType,
910
PeriodDtype, PeriodDtypeType,
@@ -21,8 +22,8 @@
2122
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
2223
'int32', 'uint32', 'int64', 'uint64']])
2324

24-
_NS_DTYPE = np.dtype('M8[ns]')
25-
_TD_DTYPE = np.dtype('m8[ns]')
25+
_NS_DTYPE = conversion.NS_DTYPE
26+
_TD_DTYPE = conversion.TD_DTYPE
2627
_INT64_DTYPE = np.dtype(np.int64)
2728

2829
# oh the troubles to reduce import time
@@ -31,6 +32,9 @@
3132
_ensure_float64 = algos.ensure_float64
3233
_ensure_float32 = algos.ensure_float32
3334

35+
_ensure_datetime64ns = conversion.ensure_datetime64ns
36+
_ensure_timedelta64ns = conversion.ensure_timedelta64ns
37+
3438

3539
def _ensure_float(arr):
3640
"""

pandas/core/internals.py

+7
Original file line numberDiff line numberDiff line change
@@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
19541954
_can_hold_na = True
19551955
is_numeric = False
19561956

1957+
def __init__(self, values, placement, fastpath=False, **kwargs):
1958+
if values.dtype != _TD_DTYPE:
1959+
values = conversion.ensure_timedelta64ns(values)
1960+
1961+
super(TimeDeltaBlock, self).__init__(values, fastpath=True,
1962+
placement=placement, **kwargs)
1963+
19571964
@property
19581965
def _box_func(self):
19591966
return lambda x: tslib.Timedelta(x, unit='ns')

pandas/tests/frame/test_dtypes.py

+55
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,61 @@ def test_astype_categoricaldtype_class_raises(self, cls):
640640
with tm.assert_raises_regex(TypeError, xpr):
641641
df['A'].astype(cls)
642642

643+
@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
644+
@pytest.mark.parametrize("dtype", ["M8", "m8"])
645+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
646+
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
647+
# tests all units from numeric origination
648+
# gh-19223
649+
dtype = "{}[{}]".format(dtype, unit)
650+
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
651+
df = DataFrame(arr)
652+
result = df.astype(dtype)
653+
expected = DataFrame(arr.astype(dtype))
654+
655+
tm.assert_frame_equal(result, expected)
656+
657+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
658+
def test_astype_to_datetime_unit(self, unit):
659+
# tests all units from datetime origination
660+
# gh-19223
661+
dtype = "M8[{}]".format(unit)
662+
arr = np.array([[1, 2, 3]], dtype=dtype)
663+
df = DataFrame(arr)
664+
result = df.astype(dtype)
665+
expected = DataFrame(arr.astype(dtype))
666+
667+
tm.assert_frame_equal(result, expected)
668+
669+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
670+
def test_astype_to_timedelta_unit(self, unit):
671+
# tests all units from timedelta origination
672+
# gh-19223
673+
dtype = "m8[{}]".format(unit)
674+
arr = np.array([[1, 2, 3]], dtype=dtype)
675+
df = DataFrame(arr)
676+
result = df.astype(dtype)
677+
expected = DataFrame(arr.astype(dtype))
678+
679+
tm.assert_frame_equal(result, expected)
680+
681+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
682+
def test_astype_to_incorrect_datetimelike(self, unit):
683+
# trying to astype a m to a M, or vice-versa
684+
# gh-19176
685+
dtype = "M8[{}]".format(unit)
686+
other = "m8[{}]".format(unit)
687+
688+
with pytest.raises(TypeError):
689+
arr = np.array([[1, 2, 3]], dtype=dtype)
690+
df = DataFrame(arr)
691+
df.astype(other)
692+
693+
with pytest.raises(TypeError):
694+
arr = np.array([[1, 2, 3]], dtype=other)
695+
df = DataFrame(arr)
696+
df.astype(dtype)
697+
643698
def test_timedeltas(self):
644699
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
645700
freq='D')),

pandas/tests/series/test_constructors.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
552552
s.iloc[0] = np.nan
553553
assert s.dtype == 'M8[ns]'
554554

555-
# invalid astypes
556-
for t in ['s', 'D', 'us', 'ms']:
557-
pytest.raises(TypeError, s.astype, 'M8[%s]' % t)
558-
559555
# GH3414 related
560556
pytest.raises(TypeError, lambda x: Series(
561557
Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
@@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
707703
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
708704
assert_series_equal(s, expected)
709705

706+
@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
707+
@pytest.mark.parametrize("dtype", ["M8", "m8"])
708+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
709+
def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
710+
# tests all units
711+
# gh-19223
712+
dtype = "{}[{}]".format(dtype, unit)
713+
arr = np.array([1, 2, 3], dtype=arr_dtype)
714+
s = Series(arr)
715+
result = s.astype(dtype)
716+
expected = Series(arr.astype(dtype))
717+
718+
tm.assert_series_equal(result, expected)
719+
710720
@pytest.mark.parametrize('arg',
711721
['2013-01-01 00:00:00', pd.NaT, np.nan, None])
712722
def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):

0 commit comments

Comments
 (0)