Skip to content

Commit c33527a

Browse files
jbrockmendeltm9k1
authored andcommitted
BUG/REF: TimedeltaIndex.__new__ (pandas-dev#23539)
1 parent 2e1e644 commit c33527a

File tree

12 files changed

+330
-103
lines changed

12 files changed

+330
-103
lines changed

doc/source/whatsnew/v0.24.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,7 @@ Backwards incompatible API changes
247247

248248
- A newly constructed empty :class:`DataFrame` with integer as the ``dtype`` will now only be cast to ``float64`` if ``index`` is specified (:issue:`22858`)
249249
- :meth:`Series.str.cat` will now raise if `others` is a `set` (:issue:`23009`)
250+
- Passing scalar values to :class:`DatetimeIndex` or :class:`TimedeltaIndex` will now raise ``TypeError`` instead of ``ValueError`` (:issue:`23539`)
250251

251252
.. _whatsnew_0240.api_breaking.deps:
252253

@@ -970,6 +971,7 @@ Deprecations
970971
- The class ``FrozenNDArray`` has been deprecated. When unpickling, ``FrozenNDArray`` will be unpickled to ``np.ndarray`` once this class is removed (:issue:`9031`)
971972
- Deprecated the `nthreads` keyword of :func:`pandas.read_feather` in favor of
972973
`use_threads` to reflect the changes in pyarrow 0.11.0. (:issue:`23053`)
974+
- Constructing a :class:`TimedeltaIndex` from data with ``datetime64``-dtyped data is deprecated, will raise ``TypeError`` in a future version (:issue:`23539`)
973975

974976
.. _whatsnew_0240.deprecations.datetimelike_int_ops:
975977

pandas/core/arrays/datetimes.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,7 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
234234

235235
result = cls._simple_new(values, freq=freq, tz=tz)
236236
if freq_infer:
237-
inferred = result.inferred_freq
238-
if inferred:
239-
result.freq = to_offset(inferred)
237+
result.freq = to_offset(result.inferred_freq)
240238

241239
# NB: Among other things not yet ported from the DatetimeIndex
242240
# constructor, this does not call _deepcopy_if_needed

pandas/core/arrays/timedeltas.py

+172-7
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,28 @@
11
# -*- coding: utf-8 -*-
22
from datetime import timedelta
3+
import warnings
34

45
import numpy as np
56

67
from pandas._libs import tslibs
7-
from pandas._libs.tslibs import Timedelta, Timestamp, NaT
8+
from pandas._libs.tslibs import Timedelta, Timestamp, NaT, iNaT
89
from pandas._libs.tslibs.fields import get_timedelta_field
9-
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
10+
from pandas._libs.tslibs.timedeltas import (
11+
array_to_timedelta64, parse_timedelta_unit)
1012

1113
from pandas import compat
1214

1315
from pandas.core.dtypes.common import (
14-
_TD_DTYPE, is_list_like)
15-
from pandas.core.dtypes.generic import ABCSeries
16+
_TD_DTYPE,
17+
is_object_dtype,
18+
is_string_dtype,
19+
is_float_dtype,
20+
is_integer_dtype,
21+
is_timedelta64_dtype,
22+
is_datetime64_dtype,
23+
is_list_like,
24+
ensure_int64)
25+
from pandas.core.dtypes.generic import ABCSeries, ABCTimedeltaIndex
1626
from pandas.core.dtypes.missing import isna
1727

1828
import pandas.core.common as com
@@ -139,9 +149,7 @@ def __new__(cls, values, freq=None):
139149

140150
result = cls._simple_new(values, freq=freq)
141151
if freq_infer:
142-
inferred = result.inferred_freq
143-
if inferred:
144-
result.freq = to_offset(inferred)
152+
result.freq = to_offset(result.inferred_freq)
145153

146154
return result
147155

@@ -397,6 +405,163 @@ def f(x):
397405
# ---------------------------------------------------------------------
398406
# Constructor Helpers
399407

408+
def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
409+
"""
410+
Parameters
411+
----------
412+
array : list-like
413+
copy : bool, default False
414+
unit : str, default "ns"
415+
errors : {"raise", "coerce", "ignore"}, default "raise"
416+
417+
Returns
418+
-------
419+
ndarray[timedelta64[ns]]
420+
inferred_freq : Tick or None
421+
422+
Raises
423+
------
424+
ValueError : data cannot be converted to timedelta64[ns]
425+
426+
Notes
427+
-----
428+
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
429+
errors to be ignored; they are caught and subsequently ignored at a
430+
higher level.
431+
"""
432+
inferred_freq = None
433+
unit = parse_timedelta_unit(unit)
434+
435+
# Unwrap whatever we have into a np.ndarray
436+
if not hasattr(data, 'dtype'):
437+
# e.g. list, tuple
438+
if np.ndim(data) == 0:
439+
# i.e. generator
440+
data = list(data)
441+
data = np.array(data, copy=False)
442+
elif isinstance(data, ABCSeries):
443+
data = data._values
444+
elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)):
445+
inferred_freq = data.freq
446+
data = data._data
447+
448+
# Convert whatever we have into timedelta64[ns] dtype
449+
if is_object_dtype(data) or is_string_dtype(data):
450+
# no need to make a copy, need to convert if string-dtyped
451+
data = objects_to_td64ns(data, unit=unit, errors=errors)
452+
copy = False
453+
454+
elif is_integer_dtype(data):
455+
# treat as multiples of the given unit
456+
data, copy_made = ints_to_td64ns(data, unit=unit)
457+
copy = copy and not copy_made
458+
459+
elif is_float_dtype(data):
460+
# treat as multiples of the given unit. If after converting to nanos,
461+
# there are fractional components left, these are truncated
462+
# (i.e. NOT rounded)
463+
mask = np.isnan(data)
464+
coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
465+
data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
466+
data[mask] = iNaT
467+
copy = False
468+
469+
elif is_timedelta64_dtype(data):
470+
if data.dtype != _TD_DTYPE:
471+
# non-nano unit
472+
# TODO: watch out for overflows
473+
data = data.astype(_TD_DTYPE)
474+
copy = False
475+
476+
elif is_datetime64_dtype(data):
477+
# GH#23539
478+
warnings.warn("Passing datetime64-dtype data to TimedeltaIndex is "
479+
"deprecated, will raise a TypeError in a future "
480+
"version",
481+
FutureWarning, stacklevel=3)
482+
data = ensure_int64(data).view(_TD_DTYPE)
483+
484+
else:
485+
raise TypeError("dtype {dtype} cannot be converted to timedelta64[ns]"
486+
.format(dtype=data.dtype))
487+
488+
data = np.array(data, copy=copy)
489+
assert data.dtype == 'm8[ns]', data
490+
return data, inferred_freq
491+
492+
493+
def ints_to_td64ns(data, unit="ns"):
494+
"""
495+
Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
496+
the integers as multiples of the given timedelta unit.
497+
498+
Parameters
499+
----------
500+
data : np.ndarray with integer-dtype
501+
unit : str, default "ns"
502+
503+
Returns
504+
-------
505+
ndarray[timedelta64[ns]]
506+
bool : whether a copy was made
507+
"""
508+
copy_made = False
509+
unit = unit if unit is not None else "ns"
510+
511+
if data.dtype != np.int64:
512+
# converting to int64 makes a copy, so we can avoid
513+
# re-copying later
514+
data = data.astype(np.int64)
515+
copy_made = True
516+
517+
if unit != "ns":
518+
dtype_str = "timedelta64[{unit}]".format(unit=unit)
519+
data = data.view(dtype_str)
520+
521+
# TODO: watch out for overflows when converting from lower-resolution
522+
data = data.astype("timedelta64[ns]")
523+
# the astype conversion makes a copy, so we can avoid re-copying later
524+
copy_made = True
525+
526+
else:
527+
data = data.view("timedelta64[ns]")
528+
529+
return data, copy_made
530+
531+
532+
def objects_to_td64ns(data, unit="ns", errors="raise"):
533+
"""
534+
Convert a object-dtyped or string-dtyped array into an
535+
timedelta64[ns]-dtyped array.
536+
537+
Parameters
538+
----------
539+
data : ndarray or Index
540+
unit : str, default "ns"
541+
errors : {"raise", "coerce", "ignore"}, default "raise"
542+
543+
Returns
544+
-------
545+
ndarray[timedelta64[ns]]
546+
547+
Raises
548+
------
549+
ValueError : data cannot be converted to timedelta64[ns]
550+
551+
Notes
552+
-----
553+
Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
554+
errors to be ignored; they are caught and subsequently ignored at a
555+
higher level.
556+
"""
557+
# coerce Index to np.ndarray, converting string-dtype if necessary
558+
values = np.array(data, dtype=np.object_, copy=False)
559+
560+
result = array_to_timedelta64(values,
561+
unit=unit, errors=errors)
562+
return result.view('timedelta64[ns]')
563+
564+
400565
def _generate_regular_range(start, end, periods, offset):
401566
stride = offset.nanos
402567
if periods is None:

pandas/core/indexes/datetimes.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,21 @@ def __new__(cls, data=None,
239239
dayfirst=False, yearfirst=False, dtype=None,
240240
copy=False, name=None, verify_integrity=True):
241241

242+
if data is None:
243+
# TODO: Remove this block and associated kwargs; GH#20535
244+
result = cls._generate_range(start, end, periods,
245+
freq=freq, tz=tz, normalize=normalize,
246+
closed=closed, ambiguous=ambiguous)
247+
result.name = name
248+
return result
249+
250+
if is_scalar(data):
251+
raise TypeError("{cls}() must be called with a "
252+
"collection of some kind, {data} was passed"
253+
.format(cls=cls.__name__, data=repr(data)))
254+
255+
# - Cases checked above all return/raise before reaching here - #
256+
242257
# This allows to later ensure that the 'copy' parameter is honored:
243258
if isinstance(data, Index):
244259
ref_to_data = data._data
@@ -253,20 +268,8 @@ def __new__(cls, data=None,
253268
# if dtype has an embedded tz, capture it
254269
tz = dtl.validate_tz_from_dtype(dtype, tz)
255270

256-
if data is None:
257-
# TODO: Remove this block and associated kwargs; GH#20535
258-
result = cls._generate_range(start, end, periods,
259-
freq=freq, tz=tz, normalize=normalize,
260-
closed=closed, ambiguous=ambiguous)
261-
result.name = name
262-
return result
263-
264271
if not isinstance(data, (np.ndarray, Index, ABCSeries,
265272
DatetimeArrayMixin)):
266-
if is_scalar(data):
267-
raise ValueError('DatetimeIndex() must be called with a '
268-
'collection of some kind, %s was passed'
269-
% repr(data))
270273
# other iterable of some kind
271274
if not isinstance(data, (list, tuple)):
272275
data = list(data)
@@ -328,9 +331,7 @@ def __new__(cls, data=None,
328331
cls._validate_frequency(subarr, freq, ambiguous=ambiguous)
329332

330333
if freq_infer:
331-
inferred = subarr.inferred_freq
332-
if inferred:
333-
subarr.freq = to_offset(inferred)
334+
subarr.freq = to_offset(subarr.inferred_freq)
334335

335336
return subarr._deepcopy_if_needed(ref_to_data, copy)
336337

pandas/core/indexes/timedeltas.py

+26-35
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
from pandas.core.dtypes.missing import isna
1616

1717
from pandas.core.arrays.timedeltas import (
18-
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8)
18+
TimedeltaArrayMixin, _is_convertible_to_td, _to_m8,
19+
sequence_to_td64ns)
1920
from pandas.core.arrays import datetimelike as dtl
2021

2122
from pandas.core.indexes.base import Index
@@ -33,10 +34,9 @@
3334
TimelikeOps, DatetimeIndexOpsMixin, wrap_arithmetic_op,
3435
wrap_array_method, wrap_field_accessor)
3536
from pandas.core.tools.timedeltas import (
36-
to_timedelta, _coerce_scalar_to_timedelta_type)
37+
_coerce_scalar_to_timedelta_type)
3738
from pandas._libs import (lib, index as libindex,
3839
join as libjoin, Timedelta, NaT)
39-
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
4040

4141

4242
class TimedeltaIndex(TimedeltaArrayMixin, DatetimeIndexOpsMixin,
@@ -139,12 +139,6 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
139139
periods=None, closed=None, dtype=None, copy=False,
140140
name=None, verify_integrity=True):
141141

142-
if isinstance(data, TimedeltaIndex) and freq is None and name is None:
143-
if copy:
144-
return data.copy()
145-
else:
146-
return data._shallow_copy()
147-
148142
freq, freq_infer = dtl.maybe_infer_freq(freq)
149143

150144
if data is None:
@@ -154,32 +148,31 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
154148
result.name = name
155149
return result
156150

157-
if unit is not None:
158-
data = to_timedelta(data, unit=unit, box=False)
159-
160151
if is_scalar(data):
161-
raise ValueError('TimedeltaIndex() must be called with a '
162-
'collection of some kind, {data} was passed'
163-
.format(data=repr(data)))
164-
165-
# convert if not already
166-
if getattr(data, 'dtype', None) != _TD_DTYPE:
167-
data = to_timedelta(data, unit=unit, box=False)
168-
elif copy:
169-
data = np.array(data, copy=True)
170-
171-
data = np.array(data, copy=False)
172-
if data.dtype == np.object_:
173-
data = array_to_timedelta64(data)
174-
if data.dtype != _TD_DTYPE:
175-
if is_timedelta64_dtype(data):
176-
# non-nano unit
177-
# TODO: watch out for overflows
178-
data = data.astype(_TD_DTYPE)
152+
raise TypeError('{cls}() must be called with a '
153+
'collection of some kind, {data} was passed'
154+
.format(cls=cls.__name__, data=repr(data)))
155+
156+
if isinstance(data, TimedeltaIndex) and freq is None and name is None:
157+
if copy:
158+
return data.copy()
179159
else:
180-
data = ensure_int64(data).view(_TD_DTYPE)
160+
return data._shallow_copy()
181161

182-
assert data.dtype == 'm8[ns]', data.dtype
162+
# - Cases checked above all return/raise before reaching here - #
163+
164+
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
165+
if inferred_freq is not None:
166+
if freq is not None and freq != inferred_freq:
167+
raise ValueError('Inferred frequency {inferred} from passed '
168+
'values does not conform to passed frequency '
169+
'{passed}'
170+
.format(inferred=inferred_freq,
171+
passed=freq.freqstr))
172+
elif freq_infer:
173+
freq = inferred_freq
174+
freq_infer = False
175+
verify_integrity = False
183176

184177
subarr = cls._simple_new(data, name=name, freq=freq)
185178
# check that we are matching freqs
@@ -188,9 +181,7 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
188181
cls._validate_frequency(subarr, freq)
189182

190183
if freq_infer:
191-
inferred = subarr.inferred_freq
192-
if inferred:
193-
subarr.freq = to_offset(inferred)
184+
subarr.freq = to_offset(subarr.inferred_freq)
194185

195186
return subarr
196187

0 commit comments

Comments
 (0)