diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index b656690b30e34..e0f07cb59b1ff 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -177,16 +177,14 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs): we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ + assert isinstance(values, np.ndarray), type(values) + if values.dtype == 'i8': + # for compat with datetime/timedelta/period shared methods, + # we can sometimes get here with int64 values. These represent + # nanosecond UTC (or tz-naive) unix timestamps + values = values.view('M8[ns]') - if getattr(values, 'dtype', None) is None: - # empty, but with dtype compat - if values is None: - values = np.empty(0, dtype=_NS_DTYPE) - return cls(values, freq=freq, tz=tz, **kwargs) - values = np.array(values, copy=False) - - if not is_datetime64_dtype(values): - values = ensure_int64(values).view(_NS_DTYPE) + assert values.dtype == 'M8[ns]', values.dtype result = object.__new__(cls) result._data = values @@ -209,6 +207,16 @@ def __new__(cls, values, freq=None, tz=None, dtype=None): # if dtype has an embedded tz, capture it tz = dtl.validate_tz_from_dtype(dtype, tz) + if isinstance(values, DatetimeArrayMixin): + # extract nanosecond unix timestamps + values = values.asi8 + if values.dtype == 'i8': + values = values.view('M8[ns]') + + assert isinstance(values, np.ndarray), type(values) + assert is_datetime64_dtype(values) # not yet assured nanosecond + values = conversion.ensure_datetime64ns(values, copy=False) + result = cls._simple_new(values, freq=freq, tz=tz) if freq_infer: inferred = result.inferred_freq @@ -271,7 +279,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None, # TODO: consider re-implementing _cached_range; GH#17914 index = _generate_regular_range(cls, start, end, periods, freq) - if tz is not None and getattr(index, 'tz', None) is None: + if tz is not None and index.tz is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) @@ -843,7 +851,8 @@ def to_perioddelta(self, freq): # TODO: consider privatizing (discussion in GH#23113) from pandas.core.arrays.timedeltas import TimedeltaArrayMixin i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8 - return TimedeltaArrayMixin(i8delta) + m8delta = i8delta.view('m8[ns]') + return TimedeltaArrayMixin(m8delta) # ----------------------------------------------------------------- # Properties - Vectorized Timestamp Properties/Methods @@ -1320,6 +1329,27 @@ def to_julian_date(self): def _generate_regular_range(cls, start, end, periods, freq): + """ + Generate a range of dates with the spans between dates described by + the given `freq` DateOffset. + + Parameters + ---------- + cls : class + start : Timestamp or None + first point of produced date range + end : Timestamp or None + last point of produced date range + periods : int + number of periods in produced date range + freq : DateOffset + describes space between dates in produced date range + + Returns + ------- + ndarray[np.int64] representing nanosecond unix timestamps + + """ if isinstance(freq, Tick): stride = freq.nanos if periods is None: @@ -1342,22 +1372,22 @@ def _generate_regular_range(cls, start, end, periods, freq): raise ValueError("at least 'start' or 'end' should be specified " "if a 'period' is given.") - data = np.arange(b, e, stride, dtype=np.int64) - data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) + values = np.arange(b, e, stride, dtype=np.int64) + else: tz = None # start and end should have the same timezone by this point - if isinstance(start, Timestamp): + if start is not None: tz = start.tz - elif isinstance(end, Timestamp): + elif end is not None: tz = end.tz xdr = generate_range(start=start, end=end, periods=periods, offset=freq) - values = np.array([x.value for x in xdr]) - data = cls._simple_new(values, freq=freq, tz=tz) + values = np.array([x.value for x in xdr], dtype=np.int64) + data = cls._simple_new(values, freq=freq, tz=tz) return data diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1ffdac1989129..ae64179b36485 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCDataFrame, ABCMultiIndex, - ABCPeriodIndex, ABCTimedeltaIndex, + ABCPeriodIndex, ABCTimedeltaIndex, ABCDatetimeIndex, ABCDateOffset) from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.cast import maybe_cast_to_integer_array @@ -545,6 +545,10 @@ def _shallow_copy(self, values=None, **kwargs): # _simple_new expects an ndarray values = getattr(values, 'values', values) + if isinstance(values, ABCDatetimeIndex): + # `self.values` returns `self` for tz-aware, so we need to unwrap + # more specifically + values = values.asi8 return self._simple_new(values, **attributes) @@ -2947,7 +2951,8 @@ def difference(self, other): self._assert_can_do_setop(other) if self.equals(other): - return self._shallow_copy([]) + # pass an empty np.ndarray with the appropriate dtype + return self._shallow_copy(self._data[:0]) other, result_name = self._convert_can_do_setop(other) @@ -3715,7 +3720,8 @@ def reindex(self, target, method=None, level=None, limit=None, if not isinstance(target, Index) and len(target) == 0: attrs = self._get_attributes_dict() attrs.pop('freq', None) # don't preserve freq - target = self._simple_new(None, dtype=self.dtype, **attrs) + values = self._data[:0] # appropriately-dtyped empty array + target = self._simple_new(values, dtype=self.dtype, **attrs) else: target = ensure_index(target) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 39f247a7c4cfe..e30da01f0226a 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -175,6 +175,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps, pandas.to_datetime : Convert argument to datetime """ _resolution = cache_readonly(DatetimeArrayMixin._resolution.fget) + _shallow_copy = Index._shallow_copy _typ = 'datetimeindex' _join_precedence = 10 @@ -298,6 +299,9 @@ def __new__(cls, data=None, data = data.astype(np.int64, copy=False) subarr = data.view(_NS_DTYPE) + assert isinstance(subarr, np.ndarray), type(subarr) + assert subarr.dtype == 'M8[ns]', subarr.dtype + subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz) if dtype is not None: if not is_dtype_equal(subarr.dtype, dtype): @@ -329,22 +333,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, we require the we have a dtype compat for the values if we are passed a non-dtype compat, then coerce using the constructor """ - - if getattr(values, 'dtype', None) is None: - # empty, but with dtype compat - if values is None: - values = np.empty(0, dtype=_NS_DTYPE) - return cls(values, name=name, freq=freq, tz=tz, - dtype=dtype, **kwargs) - values = np.array(values, copy=False) - - if not is_datetime64_dtype(values): - values = ensure_int64(values).view(_NS_DTYPE) - - values = getattr(values, 'values', values) - - assert isinstance(values, np.ndarray), "values is not an np.ndarray" - assert is_datetime64_dtype(values) + # DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes + assert isinstance(values, np.ndarray), type(values) result = super(DatetimeIndex, cls)._simple_new(values, freq, tz, **kwargs) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index ae5c3ddc9dfb4..7e11ca5dbfcef 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -371,7 +371,7 @@ def _nat_new(self, box=True): def to_timestamp(self, freq=None, how='start'): from pandas import DatetimeIndex result = self._data.to_timestamp(freq=freq, how=how) - return DatetimeIndex._simple_new(result, + return DatetimeIndex._simple_new(result.asi8, name=self.name, freq=result.freq)