Skip to content

Commit fa0ed2e

Browse files
jbrockmendelPingviinituutti
authored andcommitted
REF: strictness/simplification in DatetimeArray/Index _simple_new (pandas-dev#23431)
1 parent 32094b4 commit fa0ed2e

File tree

4 files changed

+63
-37
lines changed

4 files changed

+63
-37
lines changed

pandas/core/arrays/datetimes.py

+47-17
Original file line numberDiff line numberDiff line change
@@ -177,16 +177,14 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs):
177177
we require the we have a dtype compat for the values
178178
if we are passed a non-dtype compat, then coerce using the constructor
179179
"""
180+
assert isinstance(values, np.ndarray), type(values)
181+
if values.dtype == 'i8':
182+
# for compat with datetime/timedelta/period shared methods,
183+
# we can sometimes get here with int64 values. These represent
184+
# nanosecond UTC (or tz-naive) unix timestamps
185+
values = values.view('M8[ns]')
180186

181-
if getattr(values, 'dtype', None) is None:
182-
# empty, but with dtype compat
183-
if values is None:
184-
values = np.empty(0, dtype=_NS_DTYPE)
185-
return cls(values, freq=freq, tz=tz, **kwargs)
186-
values = np.array(values, copy=False)
187-
188-
if not is_datetime64_dtype(values):
189-
values = ensure_int64(values).view(_NS_DTYPE)
187+
assert values.dtype == 'M8[ns]', values.dtype
190188

191189
result = object.__new__(cls)
192190
result._data = values
@@ -209,6 +207,16 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
209207
# if dtype has an embedded tz, capture it
210208
tz = dtl.validate_tz_from_dtype(dtype, tz)
211209

210+
if isinstance(values, DatetimeArrayMixin):
211+
# extract nanosecond unix timestamps
212+
values = values.asi8
213+
if values.dtype == 'i8':
214+
values = values.view('M8[ns]')
215+
216+
assert isinstance(values, np.ndarray), type(values)
217+
assert is_datetime64_dtype(values) # not yet assured nanosecond
218+
values = conversion.ensure_datetime64ns(values, copy=False)
219+
212220
result = cls._simple_new(values, freq=freq, tz=tz)
213221
if freq_infer:
214222
inferred = result.inferred_freq
@@ -271,7 +279,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
271279
# TODO: consider re-implementing _cached_range; GH#17914
272280
index = _generate_regular_range(cls, start, end, periods, freq)
273281

274-
if tz is not None and getattr(index, 'tz', None) is None:
282+
if tz is not None and index.tz is None:
275283
arr = conversion.tz_localize_to_utc(
276284
ensure_int64(index.values),
277285
tz, ambiguous=ambiguous)
@@ -843,7 +851,8 @@ def to_perioddelta(self, freq):
843851
# TODO: consider privatizing (discussion in GH#23113)
844852
from pandas.core.arrays.timedeltas import TimedeltaArrayMixin
845853
i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8
846-
return TimedeltaArrayMixin(i8delta)
854+
m8delta = i8delta.view('m8[ns]')
855+
return TimedeltaArrayMixin(m8delta)
847856

848857
# -----------------------------------------------------------------
849858
# Properties - Vectorized Timestamp Properties/Methods
@@ -1320,6 +1329,27 @@ def to_julian_date(self):
13201329

13211330

13221331
def _generate_regular_range(cls, start, end, periods, freq):
1332+
"""
1333+
Generate a range of dates with the spans between dates described by
1334+
the given `freq` DateOffset.
1335+
1336+
Parameters
1337+
----------
1338+
cls : class
1339+
start : Timestamp or None
1340+
first point of produced date range
1341+
end : Timestamp or None
1342+
last point of produced date range
1343+
periods : int
1344+
number of periods in produced date range
1345+
freq : DateOffset
1346+
describes space between dates in produced date range
1347+
1348+
Returns
1349+
-------
1350+
ndarray[np.int64] representing nanosecond unix timestamps
1351+
1352+
"""
13231353
if isinstance(freq, Tick):
13241354
stride = freq.nanos
13251355
if periods is None:
@@ -1342,22 +1372,22 @@ def _generate_regular_range(cls, start, end, periods, freq):
13421372
raise ValueError("at least 'start' or 'end' should be specified "
13431373
"if a 'period' is given.")
13441374

1345-
data = np.arange(b, e, stride, dtype=np.int64)
1346-
data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz)
1375+
values = np.arange(b, e, stride, dtype=np.int64)
1376+
13471377
else:
13481378
tz = None
13491379
# start and end should have the same timezone by this point
1350-
if isinstance(start, Timestamp):
1380+
if start is not None:
13511381
tz = start.tz
1352-
elif isinstance(end, Timestamp):
1382+
elif end is not None:
13531383
tz = end.tz
13541384

13551385
xdr = generate_range(start=start, end=end,
13561386
periods=periods, offset=freq)
13571387

1358-
values = np.array([x.value for x in xdr])
1359-
data = cls._simple_new(values, freq=freq, tz=tz)
1388+
values = np.array([x.value for x in xdr], dtype=np.int64)
13601389

1390+
data = cls._simple_new(values, freq=freq, tz=tz)
13611391
return data
13621392

13631393

pandas/core/indexes/base.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from pandas.core.dtypes.generic import (
1919
ABCSeries, ABCDataFrame,
2020
ABCMultiIndex,
21-
ABCPeriodIndex, ABCTimedeltaIndex,
21+
ABCPeriodIndex, ABCTimedeltaIndex, ABCDatetimeIndex,
2222
ABCDateOffset)
2323
from pandas.core.dtypes.missing import isna, array_equivalent
2424
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
@@ -545,6 +545,10 @@ def _shallow_copy(self, values=None, **kwargs):
545545

546546
# _simple_new expects an ndarray
547547
values = getattr(values, 'values', values)
548+
if isinstance(values, ABCDatetimeIndex):
549+
# `self.values` returns `self` for tz-aware, so we need to unwrap
550+
# more specifically
551+
values = values.asi8
548552

549553
return self._simple_new(values, **attributes)
550554

@@ -2947,7 +2951,8 @@ def difference(self, other):
29472951
self._assert_can_do_setop(other)
29482952

29492953
if self.equals(other):
2950-
return self._shallow_copy([])
2954+
# pass an empty np.ndarray with the appropriate dtype
2955+
return self._shallow_copy(self._data[:0])
29512956

29522957
other, result_name = self._convert_can_do_setop(other)
29532958

@@ -3715,7 +3720,8 @@ def reindex(self, target, method=None, level=None, limit=None,
37153720
if not isinstance(target, Index) and len(target) == 0:
37163721
attrs = self._get_attributes_dict()
37173722
attrs.pop('freq', None) # don't preserve freq
3718-
target = self._simple_new(None, dtype=self.dtype, **attrs)
3723+
values = self._data[:0] # appropriately-dtyped empty array
3724+
target = self._simple_new(values, dtype=self.dtype, **attrs)
37193725
else:
37203726
target = ensure_index(target)
37213727

pandas/core/indexes/datetimes.py

+6-16
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps,
175175
pandas.to_datetime : Convert argument to datetime
176176
"""
177177
_resolution = cache_readonly(DatetimeArrayMixin._resolution.fget)
178+
_shallow_copy = Index._shallow_copy
178179

179180
_typ = 'datetimeindex'
180181
_join_precedence = 10
@@ -298,6 +299,9 @@ def __new__(cls, data=None,
298299
data = data.astype(np.int64, copy=False)
299300
subarr = data.view(_NS_DTYPE)
300301

302+
assert isinstance(subarr, np.ndarray), type(subarr)
303+
assert subarr.dtype == 'M8[ns]', subarr.dtype
304+
301305
subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
302306
if dtype is not None:
303307
if not is_dtype_equal(subarr.dtype, dtype):
@@ -329,22 +333,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
329333
we require the we have a dtype compat for the values
330334
if we are passed a non-dtype compat, then coerce using the constructor
331335
"""
332-
333-
if getattr(values, 'dtype', None) is None:
334-
# empty, but with dtype compat
335-
if values is None:
336-
values = np.empty(0, dtype=_NS_DTYPE)
337-
return cls(values, name=name, freq=freq, tz=tz,
338-
dtype=dtype, **kwargs)
339-
values = np.array(values, copy=False)
340-
341-
if not is_datetime64_dtype(values):
342-
values = ensure_int64(values).view(_NS_DTYPE)
343-
344-
values = getattr(values, 'values', values)
345-
346-
assert isinstance(values, np.ndarray), "values is not an np.ndarray"
347-
assert is_datetime64_dtype(values)
336+
# DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
337+
assert isinstance(values, np.ndarray), type(values)
348338

349339
result = super(DatetimeIndex, cls)._simple_new(values, freq, tz,
350340
**kwargs)

pandas/core/indexes/period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ def _nat_new(self, box=True):
371371
def to_timestamp(self, freq=None, how='start'):
372372
from pandas import DatetimeIndex
373373
result = self._data.to_timestamp(freq=freq, how=how)
374-
return DatetimeIndex._simple_new(result,
374+
return DatetimeIndex._simple_new(result.asi8,
375375
name=self.name,
376376
freq=result.freq)
377377

0 commit comments

Comments
 (0)