Skip to content

Commit 8512cc5

Browse files
authored
COMPAT: astype(object) and dtype=object should be reflexive for Series constructor with datetimelikes (#18231)
closes #17449
1 parent 8d04daf commit 8512cc5

File tree

13 files changed

+166
-84
lines changed

13 files changed

+166
-84
lines changed

doc/source/whatsnew/v0.22.0.txt

+8
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,18 @@ Backwards incompatible API changes
3838
-
3939
-
4040

41+
42+
43+
44+
45+
4146
.. _whatsnew_0220.api:
4247

4348
Other API Changes
4449
^^^^^^^^^^^^^^^^^
4550

51+
- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`)
52+
- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`)
4653
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
4754
- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
4855
- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
@@ -80,6 +87,7 @@ Performance Improvements
8087
- :class`DateOffset` arithmetic performance is improved (:issue:`18218`)
8188
- Converting a ``Series`` of ``Timedelta`` objects to days, seconds, etc... sped up through vectorization of underlying methods (:issue:`18092`)
8289
- The overriden ``Timedelta`` properties of days, seconds and microseconds have been removed, leveraging their built-in Python versions instead (:issue:`18242`)
90+
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
8391

8492
.. _whatsnew_0220.docs:
8593

pandas/core/dtypes/cast.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
is_datetimelike,
1414
is_extension_type, is_object_dtype,
1515
is_datetime64tz_dtype, is_datetime64_dtype,
16-
is_timedelta64_dtype, is_dtype_equal,
16+
is_datetime64_ns_dtype,
17+
is_timedelta64_dtype, is_timedelta64_ns_dtype,
18+
is_dtype_equal,
1719
is_float_dtype, is_complex_dtype,
1820
is_integer_dtype,
1921
is_datetime_or_timedelta_dtype,
@@ -829,8 +831,10 @@ def maybe_castable(arr):
829831
# check datetime64[ns]/timedelta64[ns] are valid
830832
# otherwise try to coerce
831833
kind = arr.dtype.kind
832-
if kind == 'M' or kind == 'm':
833-
return is_datetime64_dtype(arr.dtype)
834+
if kind == 'M':
835+
return is_datetime64_ns_dtype(arr.dtype)
836+
elif kind == 'm':
837+
return is_timedelta64_ns_dtype(arr.dtype)
834838

835839
return arr.dtype.name not in _POSSIBLY_CAST_DTYPES
836840

pandas/core/indexes/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1027,13 +1027,16 @@ def to_frame(self, index=True):
10271027
result.index = self
10281028
return result
10291029

1030-
def _to_embed(self, keep_tz=False):
1030+
def _to_embed(self, keep_tz=False, dtype=None):
10311031
"""
10321032
*this is an internal non-public method*
10331033
10341034
return an array repr of this object, potentially casting to object
10351035
10361036
"""
1037+
if dtype is not None:
1038+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
1039+
10371040
return self.values.copy()
10381041

10391042
_index_shared_docs['astype'] = """

pandas/core/indexes/datetimes.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -917,7 +917,7 @@ def astype(self, dtype, copy=True):
917917
return Index(self.format(), name=self.name, dtype=object)
918918
elif is_period_dtype(dtype):
919919
return self.to_period(freq=dtype.freq)
920-
raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
920+
raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
921921

922922
def _get_time_micros(self):
923923
values = self.asi8
@@ -957,12 +957,15 @@ def to_series(self, keep_tz=False):
957957
index=self._shallow_copy(),
958958
name=self.name)
959959

960-
def _to_embed(self, keep_tz=False):
960+
def _to_embed(self, keep_tz=False, dtype=None):
961961
"""
962962
return an array repr of this object, potentially casting to object
963963
964964
This is for internal compat
965965
"""
966+
if dtype is not None:
967+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
968+
966969
if keep_tz and self.tz is not None:
967970

968971
# preserve the tz & copy

pandas/core/indexes/period.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -464,10 +464,14 @@ def __array_wrap__(self, result, context=None):
464464
def _box_func(self):
465465
return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
466466

467-
def _to_embed(self, keep_tz=False):
467+
def _to_embed(self, keep_tz=False, dtype=None):
468468
"""
469469
return an array repr of this object, potentially casting to object
470470
"""
471+
472+
if dtype is not None:
473+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
474+
471475
return self.asobject.values
472476

473477
@property
@@ -510,7 +514,7 @@ def astype(self, dtype, copy=True, how='start'):
510514
return self.to_timestamp(how=how).tz_localize(dtype.tz)
511515
elif is_period_dtype(dtype):
512516
return self.asfreq(freq=dtype.freq)
513-
raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
517+
raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
514518

515519
@Substitution(klass='PeriodIndex')
516520
@Appender(_shared_docs['searchsorted'])

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,7 @@ def astype(self, dtype, copy=True):
495495
elif is_integer_dtype(dtype):
496496
return Index(self.values.astype('i8', copy=copy), dtype='i8',
497497
name=self.name)
498-
raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
498+
raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
499499

500500
def union(self, other):
501501
"""

pandas/core/series.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
_is_unorderable_exception,
3030
_ensure_platform_int,
3131
pandas_dtype)
32-
from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame
32+
from pandas.core.dtypes.generic import (
33+
ABCSparseArray, ABCDataFrame, ABCIndexClass)
3334
from pandas.core.dtypes.cast import (
3435
maybe_upcast, infer_dtype_from_scalar,
3536
maybe_convert_platform,
@@ -184,8 +185,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
184185
if name is None:
185186
name = data.name
186187

187-
data = data._to_embed(keep_tz=True)
188-
copy = True
188+
data = data._to_embed(keep_tz=True, dtype=dtype)
189+
copy = False
189190
elif isinstance(data, np.ndarray):
190191
pass
191192
elif isinstance(data, Series):
@@ -3139,7 +3140,9 @@ def _sanitize_index(data, index, copy=False):
31393140
if len(data) != len(index):
31403141
raise ValueError('Length of values does not match length of ' 'index')
31413142

3142-
if isinstance(data, PeriodIndex):
3143+
if isinstance(data, ABCIndexClass) and not copy:
3144+
pass
3145+
elif isinstance(data, PeriodIndex):
31433146
data = data.asobject
31443147
elif isinstance(data, DatetimeIndex):
31453148
data = data._to_embed(keep_tz=True)
@@ -3209,12 +3212,11 @@ def _try_cast(arr, take_fast_path):
32093212
# e.g. indexes can have different conversions (so don't fast path
32103213
# them)
32113214
# GH 6140
3212-
subarr = _sanitize_index(data, index, copy=True)
3215+
subarr = _sanitize_index(data, index, copy=copy)
32133216
else:
3214-
subarr = _try_cast(data, True)
32153217

3216-
if copy:
3217-
subarr = data.copy()
3218+
# we will try to copy be-definition here
3219+
subarr = _try_cast(data, True)
32183220

32193221
elif isinstance(data, Categorical):
32203222
subarr = data

pandas/tests/frame/test_block_internals.py

+3-22
Original file line numberDiff line numberDiff line change
@@ -233,32 +233,13 @@ def test_construction_with_conversions(self):
233233

234234
# convert from a numpy array of non-ns timedelta64
235235
arr = np.array([1, 2, 3], dtype='timedelta64[s]')
236-
s = Series(arr)
237-
expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s'))
238-
assert_series_equal(s, expected)
239-
240236
df = DataFrame(index=range(3))
241237
df['A'] = arr
242238
expected = DataFrame({'A': pd.timedelta_range('00:00:01', periods=3,
243239
freq='s')},
244240
index=range(3))
245241
assert_frame_equal(df, expected)
246242

247-
# convert from a numpy array of non-ns datetime64
248-
# note that creating a numpy datetime64 is in LOCAL time!!!!
249-
# seems to work for M8[D], but not for M8[s]
250-
251-
s = Series(np.array(['2013-01-01', '2013-01-02',
252-
'2013-01-03'], dtype='datetime64[D]'))
253-
assert_series_equal(s, Series(date_range('20130101', periods=3,
254-
freq='D')))
255-
256-
# s = Series(np.array(['2013-01-01 00:00:01','2013-01-01
257-
# 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]'))
258-
259-
# assert_series_equal(s,date_range('20130101
260-
# 00:00:01',period=3,freq='s'))
261-
262243
expected = DataFrame({
263244
'dt1': Timestamp('20130101'),
264245
'dt2': date_range('20130101', periods=3),
@@ -467,7 +448,7 @@ def test_convert_objects(self):
467448
self.mixed_frame['I'] = '1'
468449

469450
# add in some items that will be nan
470-
l = len(self.mixed_frame)
451+
length = len(self.mixed_frame)
471452
self.mixed_frame['J'] = '1.'
472453
self.mixed_frame['K'] = '1'
473454
self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled'
@@ -476,8 +457,8 @@ def test_convert_objects(self):
476457
assert converted['I'].dtype == 'int64'
477458
assert converted['J'].dtype == 'float64'
478459
assert converted['K'].dtype == 'float64'
479-
assert len(converted['J'].dropna()) == l - 5
480-
assert len(converted['K'].dropna()) == l - 5
460+
assert len(converted['J'].dropna()) == length - 5
461+
assert len(converted['K'].dropna()) == length - 5
481462

482463
# via astype
483464
converted = self.mixed_frame.copy()

pandas/tests/indexes/datetimes/test_astype.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ def test_astype_raises(self):
130130
# GH 13149, GH 13209
131131
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
132132

133-
pytest.raises(ValueError, idx.astype, float)
134-
pytest.raises(ValueError, idx.astype, 'timedelta64')
135-
pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
136-
pytest.raises(ValueError, idx.astype, 'datetime64')
137-
pytest.raises(ValueError, idx.astype, 'datetime64[D]')
133+
pytest.raises(TypeError, idx.astype, float)
134+
pytest.raises(TypeError, idx.astype, 'timedelta64')
135+
pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
136+
pytest.raises(TypeError, idx.astype, 'datetime64')
137+
pytest.raises(TypeError, idx.astype, 'datetime64[D]')
138138

139139
def test_index_convert_to_datetime_array(self):
140140
def _check_rng(rng):

pandas/tests/indexes/period/test_period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ def test_astype_raises(self):
4747
# GH 13149, GH 13209
4848
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
4949

50-
pytest.raises(ValueError, idx.astype, str)
51-
pytest.raises(ValueError, idx.astype, float)
52-
pytest.raises(ValueError, idx.astype, 'timedelta64')
53-
pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
50+
pytest.raises(TypeError, idx.astype, str)
51+
pytest.raises(TypeError, idx.astype, float)
52+
pytest.raises(TypeError, idx.astype, 'timedelta64')
53+
pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
5454

5555
def test_pickle_compat_construction(self):
5656
pass

pandas/tests/indexes/timedeltas/test_astype.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ def test_astype_raises(self):
6666
# GH 13149, GH 13209
6767
idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN])
6868

69-
pytest.raises(ValueError, idx.astype, float)
70-
pytest.raises(ValueError, idx.astype, str)
71-
pytest.raises(ValueError, idx.astype, 'datetime64')
72-
pytest.raises(ValueError, idx.astype, 'datetime64[ns]')
69+
pytest.raises(TypeError, idx.astype, float)
70+
pytest.raises(TypeError, idx.astype, str)
71+
pytest.raises(TypeError, idx.astype, 'datetime64')
72+
pytest.raises(TypeError, idx.astype, 'datetime64[ns]')
7373

7474
def test_pickle_compat_construction(self):
7575
pass

0 commit comments

Comments
 (0)