Skip to content

REF: strictness/simplification in DatetimeArray/Index _simple_new #23431

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Nov 2, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 47 additions & 17 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,14 @@ def _simple_new(cls, values, freq=None, tz=None, **kwargs):
we require the we have a dtype compat for the values
if we are passed a non-dtype compat, then coerce using the constructor
"""
assert isinstance(values, np.ndarray), type(values)
if values.dtype == 'i8':
# for compat with datetime/timedelta/period shared methods,
# we can sometimes get here with int64 values. These represent
# nanosecond UTC (or tz-naive) unix timestamps
values = values.view('M8[ns]')

if getattr(values, 'dtype', None) is None:
# empty, but with dtype compat
if values is None:
values = np.empty(0, dtype=_NS_DTYPE)
return cls(values, freq=freq, tz=tz, **kwargs)
values = np.array(values, copy=False)

if not is_datetime64_dtype(values):
values = ensure_int64(values).view(_NS_DTYPE)
assert values.dtype == 'M8[ns]', values.dtype

result = object.__new__(cls)
result._data = values
Expand All @@ -209,6 +207,16 @@ def __new__(cls, values, freq=None, tz=None, dtype=None):
# if dtype has an embedded tz, capture it
tz = dtl.validate_tz_from_dtype(dtype, tz)

if isinstance(values, DatetimeArrayMixin):
# extract nanosecond unix timestamps
values = values.asi8
if values.dtype == 'i8':
values = values.view('M8[ns]')

assert isinstance(values, np.ndarray), type(values)
assert is_datetime64_dtype(values) # not yet assured nanosecond
values = conversion.ensure_datetime64ns(values, copy=False)

result = cls._simple_new(values, freq=freq, tz=tz)
if freq_infer:
inferred = result.inferred_freq
Expand Down Expand Up @@ -271,7 +279,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
# TODO: consider re-implementing _cached_range; GH#17914
index = _generate_regular_range(cls, start, end, periods, freq)

if tz is not None and getattr(index, 'tz', None) is None:
if tz is not None and index.tz is None:
arr = conversion.tz_localize_to_utc(
ensure_int64(index.values),
tz, ambiguous=ambiguous)
Expand Down Expand Up @@ -843,7 +851,8 @@ def to_perioddelta(self, freq):
# TODO: consider privatizing (discussion in GH#23113)
from pandas.core.arrays.timedeltas import TimedeltaArrayMixin
i8delta = self.asi8 - self.to_period(freq).to_timestamp().asi8
return TimedeltaArrayMixin(i8delta)
m8delta = i8delta.view('m8[ns]')
return TimedeltaArrayMixin(m8delta)

# -----------------------------------------------------------------
# Properties - Vectorized Timestamp Properties/Methods
Expand Down Expand Up @@ -1320,6 +1329,27 @@ def to_julian_date(self):


def _generate_regular_range(cls, start, end, periods, freq):
"""
Generate a range of dates with the spans between dates described by
the given `freq` DateOffset.

Parameters
----------
cls : class
start : Timestamp or None
first point of produced date range
end : Timestamp or None
last point of produced date range
periods : int
number of periods in produced date range
freq : DateOffset
describes space between dates in produced date range

Returns
-------
ndarray[np.int64] representing nanosecond unix timestamps

"""
if isinstance(freq, Tick):
stride = freq.nanos
if periods is None:
Expand All @@ -1342,22 +1372,22 @@ def _generate_regular_range(cls, start, end, periods, freq):
raise ValueError("at least 'start' or 'end' should be specified "
"if a 'period' is given.")

data = np.arange(b, e, stride, dtype=np.int64)
data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz)
values = np.arange(b, e, stride, dtype=np.int64)

else:
tz = None
# start and end should have the same timezone by this point
if isinstance(start, Timestamp):
if start is not None:
tz = start.tz
elif isinstance(end, Timestamp):
elif end is not None:
tz = end.tz

xdr = generate_range(start=start, end=end,
periods=periods, offset=freq)

values = np.array([x.value for x in xdr])
data = cls._simple_new(values, freq=freq, tz=tz)
values = np.array([x.value for x in xdr], dtype=np.int64)

data = cls._simple_new(values, freq=freq, tz=tz)
return data


Expand Down
12 changes: 9 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pandas.core.dtypes.generic import (
ABCSeries, ABCDataFrame,
ABCMultiIndex,
ABCPeriodIndex, ABCTimedeltaIndex,
ABCPeriodIndex, ABCTimedeltaIndex, ABCDatetimeIndex,
ABCDateOffset)
from pandas.core.dtypes.missing import isna, array_equivalent
from pandas.core.dtypes.cast import maybe_cast_to_integer_array
Expand Down Expand Up @@ -545,6 +545,10 @@ def _shallow_copy(self, values=None, **kwargs):

# _simple_new expects an ndarray
values = getattr(values, 'values', values)
if isinstance(values, ABCDatetimeIndex):
# `self.values` returns `self` for tz-aware, so we need to unwrap
# more specifically
values = values.asi8

return self._simple_new(values, **attributes)

Expand Down Expand Up @@ -2947,7 +2951,8 @@ def difference(self, other):
self._assert_can_do_setop(other)

if self.equals(other):
return self._shallow_copy([])
# pass an empty np.ndarray with the appropriate dtype
return self._shallow_copy(self._data[:0])

other, result_name = self._convert_can_do_setop(other)

Expand Down Expand Up @@ -3715,7 +3720,8 @@ def reindex(self, target, method=None, level=None, limit=None,
if not isinstance(target, Index) and len(target) == 0:
attrs = self._get_attributes_dict()
attrs.pop('freq', None) # don't preserve freq
target = self._simple_new(None, dtype=self.dtype, **attrs)
values = self._data[:0] # appropriately-dtyped empty array
target = self._simple_new(values, dtype=self.dtype, **attrs)
else:
target = ensure_index(target)

Expand Down
22 changes: 6 additions & 16 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ class DatetimeIndex(DatetimeArrayMixin, DatelikeOps, TimelikeOps,
pandas.to_datetime : Convert argument to datetime
"""
_resolution = cache_readonly(DatetimeArrayMixin._resolution.fget)
_shallow_copy = Index._shallow_copy

_typ = 'datetimeindex'
_join_precedence = 10
Expand Down Expand Up @@ -298,6 +299,9 @@ def __new__(cls, data=None,
data = data.astype(np.int64, copy=False)
subarr = data.view(_NS_DTYPE)

assert isinstance(subarr, np.ndarray), type(subarr)
assert subarr.dtype == 'M8[ns]', subarr.dtype

subarr = cls._simple_new(subarr, name=name, freq=freq, tz=tz)
if dtype is not None:
if not is_dtype_equal(subarr.dtype, dtype):
Expand Down Expand Up @@ -329,22 +333,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
we require the we have a dtype compat for the values
if we are passed a non-dtype compat, then coerce using the constructor
"""

if getattr(values, 'dtype', None) is None:
# empty, but with dtype compat
if values is None:
values = np.empty(0, dtype=_NS_DTYPE)
return cls(values, name=name, freq=freq, tz=tz,
dtype=dtype, **kwargs)
values = np.array(values, copy=False)

if not is_datetime64_dtype(values):
values = ensure_int64(values).view(_NS_DTYPE)

values = getattr(values, 'values', values)

assert isinstance(values, np.ndarray), "values is not an np.ndarray"
assert is_datetime64_dtype(values)
# DatetimeArray._simple_new will accept either i8 or M8[ns] dtypes
assert isinstance(values, np.ndarray), type(values)
Copy link
Member

@gfyoung gfyoung Oct 31, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same questions for all your assert statements:

  • Are these internal? If not, it would be nice to have user-friendly error messages.
  • Can these assert statements be tested in any way?


result = super(DatetimeIndex, cls)._simple_new(values, freq, tz,
**kwargs)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ def _nat_new(self, box=True):
def to_timestamp(self, freq=None, how='start'):
from pandas import DatetimeIndex
result = self._data.to_timestamp(freq=freq, how=how)
return DatetimeIndex._simple_new(result,
return DatetimeIndex._simple_new(result.asi8,
name=self.name,
freq=result.freq)

Expand Down