Skip to content

Commit 3944674

Browse files
committed
BUG/TST: assure conversions of datetimelikes for object, numeric dtypes
closes pandas-dev#19176
1 parent 8347ff8 commit 3944674

File tree

9 files changed

+166
-65
lines changed

9 files changed

+166
-65
lines changed

doc/source/whatsnew/v0.23.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,11 @@ Conversion
385385
- Bug in localization of a naive, datetime string in a ``Series`` constructor with a ``datetime64[ns, tz]`` dtype (:issue:`174151`)
386386
- :func:`Timestamp.replace` will now handle Daylight Savings transitions gracefully (:issue:`18319`)
387387

388+
389+
390+
- Bug in ``.astype()`` to non-ns timedelta units would hold the incorrect dtype (:issue:`19176`, :issue:`19222`)
391+
392+
388393
Indexing
389394
^^^^^^^^
390395

pandas/_libs/tslibs/conversion.pyx

+24-2
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ from np_datetime cimport (check_dts_bounds,
2929

3030
from util cimport (is_string_object,
3131
is_datetime64_object,
32-
is_integer_object, is_float_object)
32+
is_integer_object, is_float_object, is_array)
3333

3434
from timedeltas cimport cast_from_unit
3535
from timezones cimport (is_utc, is_tzlocal, is_fixed_offset,
@@ -45,6 +45,8 @@ from nattype cimport NPY_NAT, checknull_with_nat
4545
# Constants
4646

4747
cdef int64_t DAY_NS = 86400000000000LL
48+
NS_DTYPE = np.dtype('M8[ns]')
49+
TD_DTYPE = np.dtype('m8[ns]')
4850

4951
UTC = pytz.UTC
5052

@@ -73,13 +75,14 @@ cdef inline int64_t get_datetime64_nanos(object val) except? -1:
7375
return ival
7476

7577

76-
def ensure_datetime64ns(ndarray arr):
78+
def ensure_datetime64ns(ndarray arr, copy=True):
7779
"""
7880
Ensure a np.datetime64 array has dtype specifically 'datetime64[ns]'
7981
8082
Parameters
8183
----------
8284
arr : ndarray
85+
copy : boolean, default True
8386
8487
Returns
8588
-------
@@ -104,6 +107,8 @@ def ensure_datetime64ns(ndarray arr):
104107

105108
unit = get_datetime64_unit(arr.flat[0])
106109
if unit == PANDAS_FR_ns:
110+
if copy:
111+
arr = arr.copy()
107112
result = arr
108113
else:
109114
for i in range(n):
@@ -117,6 +122,23 @@ def ensure_datetime64ns(ndarray arr):
117122
return result
118123

119124

125+
def ensure_timedelta64ns(ndarray arr, copy=True):
126+
"""
127+
Ensure a np.timedelta64 array has dtype specifically 'timedelta64[ns]'
128+
129+
Parameters
130+
----------
131+
arr : ndarray
132+
copy : boolean, default True
133+
134+
Returns
135+
-------
136+
result : ndarray with dtype timedelta64[ns]
137+
138+
"""
139+
return arr.astype(TD_DTYPE, copy=copy)
140+
141+
120142
def datetime_to_datetime64(ndarray[object] values):
121143
"""
122144
Convert ndarray of datetime-like objects to int64 array representing

pandas/core/dtypes/cast.py

+18-24
Original file line numberDiff line numberDiff line change
@@ -656,33 +656,39 @@ def astype_nansafe(arr, dtype, copy=True):
656656
return tslib.ints_to_pydatetime(arr.view(np.int64))
657657
elif dtype == np.int64:
658658
return arr.view(dtype)
659-
elif dtype != _NS_DTYPE:
660-
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
661-
"to [{to_dtype}]".format(from_dtype=arr.dtype,
662-
to_dtype=dtype))
663-
return arr.astype(_NS_DTYPE)
659+
660+
# allow frequency conversions
661+
if dtype.kind == 'M':
662+
return arr.astype(dtype)
663+
664+
raise TypeError("cannot astype a datetimelike from [{from_dtype}] "
665+
"to [{to_dtype}]".format(from_dtype=arr.dtype,
666+
to_dtype=dtype))
667+
664668
elif is_timedelta64_dtype(arr):
665669
if dtype == np.int64:
666670
return arr.view(dtype)
667671
elif dtype == object:
668672
return tslib.ints_to_pytimedelta(arr.view(np.int64))
669673

670674
# in py3, timedelta64[ns] are int64
671-
elif ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
672-
(not PY3 and dtype != _TD_DTYPE)):
675+
if ((PY3 and dtype not in [_INT64_DTYPE, _TD_DTYPE]) or
676+
(not PY3 and dtype != _TD_DTYPE)):
673677

674678
# allow frequency conversions
679+
# we return a float here!
675680
if dtype.kind == 'm':
676681
mask = isna(arr)
677682
result = arr.astype(dtype).astype(np.float64)
678683
result[mask] = np.nan
679684
return result
685+
elif dtype == _TD_DTYPE:
686+
return arr.astype(_TD_DTYPE, copy=copy)
680687

681-
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
682-
"to [{to_dtype}]".format(from_dtype=arr.dtype,
683-
to_dtype=dtype))
688+
raise TypeError("cannot astype a timedelta from [{from_dtype}] "
689+
"to [{to_dtype}]".format(from_dtype=arr.dtype,
690+
to_dtype=dtype))
684691

685-
return arr.astype(_TD_DTYPE)
686692
elif (np.issubdtype(arr.dtype, np.floating) and
687693
np.issubdtype(dtype, np.integer)):
688694

@@ -704,19 +710,7 @@ def astype_nansafe(arr, dtype, copy=True):
704710

705711
if copy:
706712

707-
if arr.dtype == dtype:
708-
return arr.copy()
709-
710-
# we handle datetimelikes with pandas machinery
711-
# to be robust to the input type
712-
elif is_datetime64_dtype(dtype):
713-
from pandas import to_datetime
714-
return to_datetime(arr).values
715-
elif is_timedelta64_dtype(dtype):
716-
from pandas import to_timedelta
717-
return to_timedelta(arr).values
718-
719-
return arr.astype(dtype)
713+
return arr.astype(dtype, copy=True)
720714
return arr.view(dtype)
721715

722716

pandas/core/dtypes/common.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pandas.compat import (string_types, text_type, binary_type,
55
PY3, PY36)
66
from pandas._libs import algos, lib
7+
from pandas._libs.tslibs import conversion
78
from .dtypes import (CategoricalDtype, CategoricalDtypeType,
89
DatetimeTZDtype, DatetimeTZDtypeType,
910
PeriodDtype, PeriodDtypeType,
@@ -21,8 +22,8 @@
2122
for t in ['O', 'int8', 'uint8', 'int16', 'uint16',
2223
'int32', 'uint32', 'int64', 'uint64']])
2324

24-
_NS_DTYPE = np.dtype('M8[ns]')
25-
_TD_DTYPE = np.dtype('m8[ns]')
25+
_NS_DTYPE = conversion.NS_DTYPE
26+
_TD_DTYPE = conversion.TD_DTYPE
2627
_INT64_DTYPE = np.dtype(np.int64)
2728

2829
# oh the troubles to reduce import time
@@ -31,6 +32,9 @@
3132
_ensure_float64 = algos.ensure_float64
3233
_ensure_float32 = algos.ensure_float32
3334

35+
_ensure_datetime64ns = conversion.ensure_datetime64ns
36+
_ensure_timedelta64ns = conversion.ensure_timedelta64ns
37+
3438

3539
def _ensure_float(arr):
3640
"""

pandas/core/internals.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -631,7 +631,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
631631
values = astype_nansafe(values.ravel(), dtype, copy=True)
632632
values = values.reshape(self.shape)
633633

634-
newb = make_block(values, placement=self.mgr_locs, dtype=dtype,
634+
newb = make_block(values, placement=self.mgr_locs,
635635
klass=klass)
636636
except:
637637
if errors == 'raise':
@@ -1954,6 +1954,13 @@ class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock):
19541954
_can_hold_na = True
19551955
is_numeric = False
19561956

1957+
def __init__(self, values, placement, fastpath=False, **kwargs):
1958+
if values.dtype != _TD_DTYPE:
1959+
values = conversion.ensure_timedelta64ns(values)
1960+
1961+
super(TimeDeltaBlock, self).__init__(values, fastpath=True,
1962+
placement=placement, **kwargs)
1963+
19571964
@property
19581965
def _box_func(self):
19591966
return lambda x: tslib.Timedelta(x, unit='ns')

pandas/tests/frame/test_dtypes.py

+67
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,73 @@ def test_astype_categoricaldtype_class_raises(self, cls):
640640
with tm.assert_raises_regex(TypeError, xpr):
641641
df['A'].astype(cls)
642642

643+
@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
644+
@pytest.mark.parametrize("dtype", ["M8", "m8"])
645+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
646+
def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
647+
# tests all units from numeric origination
648+
# gh-19223
649+
dtype = "{}[{}]".format(dtype, unit)
650+
arr = np.array([[1, 2, 3]], dtype=arr_dtype)
651+
df = DataFrame(arr)
652+
result = df.astype(dtype)
653+
expected = DataFrame(arr.astype(dtype))
654+
655+
tm.assert_frame_equal(result, expected)
656+
657+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
658+
def test_astype_to_datetime_unit(self, unit):
659+
# tests all units from datetime origination
660+
# gh-19223
661+
dtype = "M8[{}]".format(unit)
662+
arr = np.array([[1, 2, 3]], dtype=dtype)
663+
df = DataFrame(arr)
664+
result = df.astype(dtype)
665+
expected = DataFrame(arr.astype(dtype))
666+
667+
tm.assert_frame_equal(result, expected)
668+
669+
@pytest.mark.parametrize("unit", ['ns'])
670+
def test_astype_to_timedelta_unit_ns(self, unit):
671+
# preserver the timedelta conversion
672+
# gh-19223
673+
dtype = "m8[{}]".format(unit)
674+
arr = np.array([[1, 2, 3]], dtype=dtype)
675+
df = DataFrame(arr)
676+
result = df.astype(dtype)
677+
expected = DataFrame(arr.astype(dtype))
678+
679+
tm.assert_frame_equal(result, expected)
680+
681+
@pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D'])
682+
def test_astype_to_timedelta_unit(self, unit):
683+
# coerce to float
684+
# gh-19223
685+
dtype = "m8[{}]".format(unit)
686+
arr = np.array([[1, 2, 3]], dtype=dtype)
687+
df = DataFrame(arr)
688+
result = df.astype(dtype)
689+
expected = DataFrame(df.values.astype(dtype).astype(float))
690+
691+
tm.assert_frame_equal(result, expected)
692+
693+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
694+
def test_astype_to_incorrect_datetimelike(self, unit):
695+
# trying to astype a m to a M, or vice-versa
696+
# gh-19176
697+
dtype = "M8[{}]".format(unit)
698+
other = "m8[{}]".format(unit)
699+
700+
with pytest.raises(TypeError):
701+
arr = np.array([[1, 2, 3]], dtype=dtype)
702+
df = DataFrame(arr)
703+
df.astype(other)
704+
705+
with pytest.raises(TypeError):
706+
arr = np.array([[1, 2, 3]], dtype=other)
707+
df = DataFrame(arr)
708+
df.astype(dtype)
709+
643710
def test_timedeltas(self):
644711
df = DataFrame(dict(A=Series(date_range('2012-1-1', periods=3,
645712
freq='D')),

pandas/tests/reshape/merge/test_merge.py

+11-13
Original file line numberDiff line numberDiff line change
@@ -523,25 +523,23 @@ def test_other_datetime_unit(self):
523523
columns=['entity_id', 'days'])
524524
tm.assert_frame_equal(result, exp)
525525

526-
def test_other_timedelta_unit(self):
526+
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
527+
def test_other_timedelta_unit(self, unit):
527528
# GH 13389
528529
df1 = pd.DataFrame({'entity_id': [101, 102]})
529530
s = pd.Series([None, None], index=[101, 102], name='days')
530531

531-
for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]',
532-
'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]',
533-
'timedelta64[ns]']:
532+
dtype = "m8[{}]".format(unit)
533+
df2 = s.astype(dtype).to_frame('days')
534+
assert df2['days'].dtype == 'm8[ns]'
534535

535-
df2 = s.astype(dtype).to_frame('days')
536-
assert df2['days'].dtype == dtype
537-
538-
result = df1.merge(df2, left_on='entity_id', right_index=True)
536+
result = df1.merge(df2, left_on='entity_id', right_index=True)
539537

540-
exp = pd.DataFrame({'entity_id': [101, 102],
541-
'days': np.array(['nat', 'nat'],
542-
dtype=dtype)},
543-
columns=['entity_id', 'days'])
544-
tm.assert_frame_equal(result, exp)
538+
exp = pd.DataFrame({'entity_id': [101, 102],
539+
'days': np.array(['nat', 'nat'],
540+
dtype=dtype)},
541+
columns=['entity_id', 'days'])
542+
tm.assert_frame_equal(result, exp)
545543

546544
def test_overlapping_columns_error_message(self):
547545
df = DataFrame({'key': [1, 2, 3],

pandas/tests/series/test_constructors.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -552,10 +552,6 @@ def test_constructor_dtype_datetime64(self):
552552
s.iloc[0] = np.nan
553553
assert s.dtype == 'M8[ns]'
554554

555-
# invalid astypes
556-
for t in ['s', 'D', 'us', 'ms']:
557-
pytest.raises(TypeError, s.astype, 'M8[%s]' % t)
558-
559555
# GH3414 related
560556
pytest.raises(TypeError, lambda x: Series(
561557
Series(dates).astype('int') / 1000000, dtype='M8[ms]'))
@@ -707,6 +703,20 @@ def test_constructor_with_datetime_tz(self):
707703
expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern'))
708704
assert_series_equal(s, expected)
709705

706+
@pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
707+
@pytest.mark.parametrize("dtype", ["M8", "m8"])
708+
@pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
709+
def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit):
710+
# tests all units
711+
# gh-19223
712+
dtype = "{}[{}]".format(dtype, unit)
713+
arr = np.array([1, 2, 3], dtype=arr_dtype)
714+
s = Series(arr)
715+
result = s.astype(dtype)
716+
expected = Series(arr.astype(dtype))
717+
718+
tm.assert_series_equal(result, expected)
719+
710720
@pytest.mark.parametrize('arg',
711721
['2013-01-01 00:00:00', pd.NaT, np.nan, None])
712722
def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg):

pandas/tests/series/test_operators.py

+13-19
Original file line numberDiff line numberDiff line change
@@ -1649,32 +1649,26 @@ def test_invalid_ops(self):
16491649
pytest.raises(Exception, self.objSeries.__sub__,
16501650
np.array(1, dtype=np.int64))
16511651

1652-
def test_timedelta64_conversions(self):
1652+
@pytest.mark.parametrize("m", [1, 3, 10])
1653+
@pytest.mark.parametrize("unit", ['D', 'h', 'm', 's', 'ms', 'us', 'ns'])
1654+
def test_timedelta64_conversions(self, m, unit):
1655+
16531656
startdate = Series(date_range('2013-01-01', '2013-01-03'))
16541657
enddate = Series(date_range('2013-03-01', '2013-03-03'))
16551658

16561659
s1 = enddate - startdate
16571660
s1[2] = np.nan
16581661

1659-
for m in [1, 3, 10]:
1660-
for unit in ['D', 'h', 'm', 's', 'ms', 'us', 'ns']:
1661-
1662-
# op
1663-
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
1664-
result = s1 / np.timedelta64(m, unit)
1665-
assert_series_equal(result, expected)
1666-
1667-
if m == 1 and unit != 'ns':
1668-
1669-
# astype
1670-
result = s1.astype("timedelta64[{0}]".format(unit))
1671-
assert_series_equal(result, expected)
1662+
# op
1663+
expected = s1.apply(lambda x: x / np.timedelta64(m, unit))
1664+
result = s1 / np.timedelta64(m, unit)
1665+
assert_series_equal(result, expected)
16721666

1673-
# reverse op
1674-
expected = s1.apply(
1675-
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
1676-
result = np.timedelta64(m, unit) / s1
1677-
assert_series_equal(result, expected)
1667+
# reverse op
1668+
expected = s1.apply(
1669+
lambda x: Timedelta(np.timedelta64(m, unit)) / x)
1670+
result = np.timedelta64(m, unit) / s1
1671+
assert_series_equal(result, expected)
16781672

16791673
# astype
16801674
s = Series(date_range('20130101', periods=3))

0 commit comments

Comments
 (0)