Skip to content

Commit a8f64d7

Browse files
committed
COMPAT: astype(object) and dtype=object should be reflexive for Series constructor with datetimelikes
closes #17449
1 parent c20332c commit a8f64d7

File tree

13 files changed

+164
-84
lines changed

13 files changed

+164
-84
lines changed

doc/source/whatsnew/v0.22.0.txt

+8
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,18 @@ Backwards incompatible API changes
3535
-
3636
-
3737

38+
39+
40+
41+
42+
3843
.. _whatsnew_0220.api:
3944

4045
Other API Changes
4146
^^^^^^^^^^^^^^^^^
4247

48+
- :func:`Series.astype` and :func:`Index.astype` with an incompatible dtype will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`18231`)
49+
- ``Series`` construction with an ``object`` dtyped tz-aware datetime and ``dtype=object`` specified, will now return an ``object`` dtyped ``Series``, previously this would infer the datetime dtype (:issue:`18231`)
4350
- ``NaT`` division with :class:`datetime.timedelta` will now return ``NaN`` instead of raising (:issue:`17876`)
4451
- All-NaN levels in a ``MultiIndex`` are now assigned ``float`` rather than ``object`` dtype, promoting consistency with ``Index`` (:issue:`17929`).
4552
- :class:`Timestamp` will no longer silently ignore unused or invalid ``tz`` or ``tzinfo`` keyword arguments (:issue:`17690`)
@@ -71,6 +78,7 @@ Performance Improvements
7178

7279
- Indexers on ``Series`` or ``DataFrame`` no longer create a reference cycle (:issue:`17956`)
7380
- Added a keyword argument, ``cache``, to :func:`to_datetime` that improved the performance of converting duplicate datetime arguments (:issue:`11665`)
81+
- ``Series`` construction will reduce the number of copies made of the input data in certain cases (:issue:`17449`)
7482
-
7583

7684
.. _whatsnew_0220.docs:

pandas/core/dtypes/cast.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
is_datetimelike,
1414
is_extension_type, is_object_dtype,
1515
is_datetime64tz_dtype, is_datetime64_dtype,
16-
is_timedelta64_dtype, is_dtype_equal,
16+
is_datetime64_ns_dtype,
17+
is_timedelta64_dtype, is_timedelta64_ns_dtype,
18+
is_dtype_equal,
1719
is_float_dtype, is_complex_dtype,
1820
is_integer_dtype,
1921
is_datetime_or_timedelta_dtype,
@@ -829,8 +831,10 @@ def maybe_castable(arr):
829831
# check datetime64[ns]/timedelta64[ns] are valid
830832
# otherwise try to coerce
831833
kind = arr.dtype.kind
832-
if kind == 'M' or kind == 'm':
833-
return is_datetime64_dtype(arr.dtype)
834+
if kind == 'M':
835+
return is_datetime64_ns_dtype(arr.dtype)
836+
elif kind == 'm':
837+
return is_timedelta64_ns_dtype(arr.dtype)
834838

835839
return arr.dtype.name not in _POSSIBLY_CAST_DTYPES
836840

pandas/core/indexes/base.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1027,13 +1027,16 @@ def to_frame(self, index=True):
10271027
result.index = self
10281028
return result
10291029

1030-
def _to_embed(self, keep_tz=False):
1030+
def _to_embed(self, keep_tz=False, dtype=None):
10311031
"""
10321032
*this is an internal non-public method*
10331033
10341034
return an array repr of this object, potentially casting to object
10351035
10361036
"""
1037+
if dtype is not None:
1038+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
1039+
10371040
return self.values.copy()
10381041

10391042
_index_shared_docs['astype'] = """

pandas/core/indexes/datetimes.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,7 @@ def astype(self, dtype, copy=True):
870870
return Index(self.format(), name=self.name, dtype=object)
871871
elif is_period_dtype(dtype):
872872
return self.to_period(freq=dtype.freq)
873-
raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype)
873+
raise TypeError('Cannot cast DatetimeIndex to dtype %s' % dtype)
874874

875875
def _get_time_micros(self):
876876
values = self.asi8
@@ -910,12 +910,15 @@ def to_series(self, keep_tz=False):
910910
index=self._shallow_copy(),
911911
name=self.name)
912912

913-
def _to_embed(self, keep_tz=False):
913+
def _to_embed(self, keep_tz=False, dtype=None):
914914
"""
915915
return an array repr of this object, potentially casting to object
916916
917917
This is for internal compat
918918
"""
919+
if dtype is not None:
920+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
921+
919922
if keep_tz and self.tz is not None:
920923

921924
# preserve the tz & copy

pandas/core/indexes/period.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -432,10 +432,14 @@ def __array_wrap__(self, result, context=None):
432432
def _box_func(self):
433433
return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq)
434434

435-
def _to_embed(self, keep_tz=False):
435+
def _to_embed(self, keep_tz=False, dtype=None):
436436
"""
437437
return an array repr of this object, potentially casting to object
438438
"""
439+
440+
if dtype is not None:
441+
return self.astype(dtype)._to_embed(keep_tz=keep_tz)
442+
439443
return self.asobject.values
440444

441445
@property
@@ -478,7 +482,7 @@ def astype(self, dtype, copy=True, how='start'):
478482
return self.to_timestamp(how=how).tz_localize(dtype.tz)
479483
elif is_period_dtype(dtype):
480484
return self.asfreq(freq=dtype.freq)
481-
raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
485+
raise TypeError('Cannot cast PeriodIndex to dtype %s' % dtype)
482486

483487
@Substitution(klass='PeriodIndex')
484488
@Appender(_shared_docs['searchsorted'])

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,7 @@ def astype(self, dtype, copy=True):
482482
elif is_integer_dtype(dtype):
483483
return Index(self.values.astype('i8', copy=copy), dtype='i8',
484484
name=self.name)
485-
raise ValueError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
485+
raise TypeError('Cannot cast TimedeltaIndex to dtype %s' % dtype)
486486

487487
def union(self, other):
488488
"""

pandas/core/series.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,8 @@
2929
_is_unorderable_exception,
3030
_ensure_platform_int,
3131
pandas_dtype)
32-
from pandas.core.dtypes.generic import ABCSparseArray, ABCDataFrame
32+
from pandas.core.dtypes.generic import (
33+
ABCSparseArray, ABCDataFrame, ABCIndexClass)
3334
from pandas.core.dtypes.cast import (
3435
maybe_upcast, infer_dtype_from_scalar,
3536
maybe_convert_platform,
@@ -184,8 +185,8 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
184185
if name is None:
185186
name = data.name
186187

187-
data = data._to_embed(keep_tz=True)
188-
copy = True
188+
data = data._to_embed(keep_tz=True, dtype=dtype)
189+
copy = False
189190
elif isinstance(data, np.ndarray):
190191
pass
191192
elif isinstance(data, Series):
@@ -3079,7 +3080,9 @@ def _sanitize_index(data, index, copy=False):
30793080
if len(data) != len(index):
30803081
raise ValueError('Length of values does not match length of ' 'index')
30813082

3082-
if isinstance(data, PeriodIndex):
3083+
if isinstance(data, ABCIndexClass) and not copy:
3084+
pass
3085+
elif isinstance(data, PeriodIndex):
30833086
data = data.asobject
30843087
elif isinstance(data, DatetimeIndex):
30853088
data = data._to_embed(keep_tz=True)
@@ -3149,12 +3152,11 @@ def _try_cast(arr, take_fast_path):
31493152
# e.g. indexes can have different conversions (so don't fast path
31503153
# them)
31513154
# GH 6140
3152-
subarr = _sanitize_index(data, index, copy=True)
3155+
subarr = _sanitize_index(data, index, copy=copy)
31533156
else:
3154-
subarr = _try_cast(data, True)
31553157

3156-
if copy:
3157-
subarr = data.copy()
3158+
# we will try to copy be-definition here
3159+
subarr = _try_cast(data, True)
31583160

31593161
elif isinstance(data, Categorical):
31603162
subarr = data

pandas/tests/frame/test_block_internals.py

+3-22
Original file line numberDiff line numberDiff line change
@@ -233,32 +233,13 @@ def test_construction_with_conversions(self):
233233

234234
# convert from a numpy array of non-ns timedelta64
235235
arr = np.array([1, 2, 3], dtype='timedelta64[s]')
236-
s = Series(arr)
237-
expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s'))
238-
assert_series_equal(s, expected)
239-
240236
df = DataFrame(index=range(3))
241237
df['A'] = arr
242238
expected = DataFrame({'A': pd.timedelta_range('00:00:01', periods=3,
243239
freq='s')},
244240
index=range(3))
245241
assert_frame_equal(df, expected)
246242

247-
# convert from a numpy array of non-ns datetime64
248-
# note that creating a numpy datetime64 is in LOCAL time!!!!
249-
# seems to work for M8[D], but not for M8[s]
250-
251-
s = Series(np.array(['2013-01-01', '2013-01-02',
252-
'2013-01-03'], dtype='datetime64[D]'))
253-
assert_series_equal(s, Series(date_range('20130101', periods=3,
254-
freq='D')))
255-
256-
# s = Series(np.array(['2013-01-01 00:00:01','2013-01-01
257-
# 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]'))
258-
259-
# assert_series_equal(s,date_range('20130101
260-
# 00:00:01',period=3,freq='s'))
261-
262243
expected = DataFrame({
263244
'dt1': Timestamp('20130101'),
264245
'dt2': date_range('20130101', periods=3),
@@ -467,7 +448,7 @@ def test_convert_objects(self):
467448
self.mixed_frame['I'] = '1'
468449

469450
# add in some items that will be nan
470-
l = len(self.mixed_frame)
451+
length = len(self.mixed_frame)
471452
self.mixed_frame['J'] = '1.'
472453
self.mixed_frame['K'] = '1'
473454
self.mixed_frame.loc[0:5, ['J', 'K']] = 'garbled'
@@ -476,8 +457,8 @@ def test_convert_objects(self):
476457
assert converted['I'].dtype == 'int64'
477458
assert converted['J'].dtype == 'float64'
478459
assert converted['K'].dtype == 'float64'
479-
assert len(converted['J'].dropna()) == l - 5
480-
assert len(converted['K'].dropna()) == l - 5
460+
assert len(converted['J'].dropna()) == length - 5
461+
assert len(converted['K'].dropna()) == length - 5
481462

482463
# via astype
483464
converted = self.mixed_frame.copy()

pandas/tests/indexes/datetimes/test_astype.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,11 @@ def test_astype_raises(self):
130130
# GH 13149, GH 13209
131131
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
132132

133-
pytest.raises(ValueError, idx.astype, float)
134-
pytest.raises(ValueError, idx.astype, 'timedelta64')
135-
pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
136-
pytest.raises(ValueError, idx.astype, 'datetime64')
137-
pytest.raises(ValueError, idx.astype, 'datetime64[D]')
133+
pytest.raises(TypeError, idx.astype, float)
134+
pytest.raises(TypeError, idx.astype, 'timedelta64')
135+
pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
136+
pytest.raises(TypeError, idx.astype, 'datetime64')
137+
pytest.raises(TypeError, idx.astype, 'datetime64[D]')
138138

139139
def test_index_convert_to_datetime_array(self):
140140
def _check_rng(rng):

pandas/tests/indexes/period/test_period.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,10 @@ def test_astype_raises(self):
4747
# GH 13149, GH 13209
4848
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
4949

50-
pytest.raises(ValueError, idx.astype, str)
51-
pytest.raises(ValueError, idx.astype, float)
52-
pytest.raises(ValueError, idx.astype, 'timedelta64')
53-
pytest.raises(ValueError, idx.astype, 'timedelta64[ns]')
50+
pytest.raises(TypeError, idx.astype, str)
51+
pytest.raises(TypeError, idx.astype, float)
52+
pytest.raises(TypeError, idx.astype, 'timedelta64')
53+
pytest.raises(TypeError, idx.astype, 'timedelta64[ns]')
5454

5555
def test_pickle_compat_construction(self):
5656
pass

pandas/tests/indexes/timedeltas/test_astype.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -66,10 +66,10 @@ def test_astype_raises(self):
6666
# GH 13149, GH 13209
6767
idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN])
6868

69-
pytest.raises(ValueError, idx.astype, float)
70-
pytest.raises(ValueError, idx.astype, str)
71-
pytest.raises(ValueError, idx.astype, 'datetime64')
72-
pytest.raises(ValueError, idx.astype, 'datetime64[ns]')
69+
pytest.raises(TypeError, idx.astype, float)
70+
pytest.raises(TypeError, idx.astype, str)
71+
pytest.raises(TypeError, idx.astype, 'datetime64')
72+
pytest.raises(TypeError, idx.astype, 'datetime64[ns]')
7373

7474
def test_pickle_compat_construction(self):
7575
pass

0 commit comments

Comments
 (0)