Skip to content

REF: Pieces broken off of #24024 #24364

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,9 +295,22 @@ def __iter__(self):

@property
def asi8(self):
# type: () -> ndarray
"""
Integer representation of the values.

Returns
-------
ndarray
An ndarray with int64 dtype.
"""
# do not cache or you'll create a memory leak
return self._data.view('i8')

@property
def _ndarray_values(self):
return self._data

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

Expand Down Expand Up @@ -451,7 +464,7 @@ def _isnan(self):
return (self.asi8 == iNaT)

@property # NB: override with cache_readonly in immutable subclasses
def hasnans(self):
def _hasnans(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIRC this is a general Index property, are changing this globally? (ok by me)

"""
return if I have any nans; enables various perf speedups
"""
Expand All @@ -475,7 +488,7 @@ def _maybe_mask_results(self, result, fill_value=iNaT, convert=None):
This is an internal routine
"""

if self.hasnans:
if self._hasnans:
if convert:
result = result.astype(convert)
if fill_value is None:
Expand Down Expand Up @@ -678,7 +691,7 @@ def _add_delta_tdi(self, other):
new_values = checked_add_with_arr(self_i8, other_i8,
arr_mask=self._isnan,
b_mask=other._isnan)
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('i8')
Expand Down Expand Up @@ -746,7 +759,7 @@ def _sub_period_array(self, other):
b_mask=other._isnan)

new_values = np.array([self.freq.base * x for x in new_values])
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = NaT
return new_values
Expand Down Expand Up @@ -1063,7 +1076,7 @@ def _evaluate_compare(self, other, op):
elif lib.is_scalar(lib.item_from_zerodim(other)):
# ndarray scalar
other = [other.item()]
other = type(self)(other)
other = type(self)._from_sequence(other)

# compare
result = op(self.asi8, other.asi8)
Expand Down
26 changes: 22 additions & 4 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def wrapper(self, other):
else:
if isinstance(other, list):
try:
other = type(self)(other)
other = type(self)._from_sequence(other)
except ValueError:
other = np.array(other, dtype=np.object_)
elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries,
Expand Down Expand Up @@ -147,7 +147,7 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -349,14 +349,32 @@ def _box_func(self):

@property
def dtype(self):
# type: () -> Union[np.dtype, DatetimeTZDtype]
"""
The dtype for the DatetimeArray.

Returns
-------
numpy.dtype or DatetimeTZDtype
If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
is returned.

If the values are tz-aware, then the ``DatetimeTZDtype``
is returned.
"""
if self.tz is None:
return _NS_DTYPE
return DatetimeTZDtype('ns', self.tz)

@property
def tz(self):
"""
Return timezone.
Return timezone, if any.

Returns
-------
datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
Returns None when the array is tz-naive.
"""
# GH 18595
return self._tz
Expand Down Expand Up @@ -522,7 +540,7 @@ def _sub_datetime_arraylike(self, other):
other_i8 = other.asi8
new_values = checked_add_with_arr(self_i8, -other_i8,
arr_mask=self._isnan)
if self.hasnans or other.hasnans:
if self._hasnans or other._hasnans:
mask = (self._isnan) | (other._isnan)
new_values[mask] = iNaT
return new_values.view('timedelta64[ns]')
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def wrapper(self, other):
other = Period(other, freq=self.freq)
result = op(other.ordinal)

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -497,7 +497,7 @@ def _time_shift(self, n, freq=None):
"{cls}._time_shift"
.format(cls=type(self).__name__))
values = self.asi8 + n * self.freq.n
if self.hasnans:
if self._hasnans:
values[self._isnan] = iNaT
return type(self)(values, freq=self.freq)

Expand Down Expand Up @@ -559,7 +559,7 @@ def asfreq(self, freq=None, how='E'):

new_data = period_asfreq_arr(ordinal, base1, base2, end)

if self.hasnans:
if self._hasnans:
new_data[self._isnan] = iNaT

return type(self)(new_data, freq=freq)
Expand All @@ -579,7 +579,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
else:
formatter = lambda dt: u'%s' % dt

if self.hasnans:
if self._hasnans:
mask = self._isnan
values[mask] = na_rep
imask = ~mask
Expand Down Expand Up @@ -668,7 +668,7 @@ def _sub_period(self, other):
new_data = asi8 - other.ordinal
new_data = np.array([self.freq * x for x in new_data])

if self.hasnans:
if self._hasnans:
new_data[self._isnan] = NaT

return new_data
Expand Down Expand Up @@ -983,7 +983,7 @@ def dt64arr_to_periodarr(data, freq, tz=None):

"""
if data.dtype != np.dtype('M8[ns]'):
raise ValueError('Wrong dtype: %s' % data.dtype)
raise ValueError('Wrong dtype: {dtype}'.format(dtype=data.dtype))
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not from #24024, but @jreback commented there asking for this change.


if freq is None:
if isinstance(data, ABCIndexClass):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _field_accessor(name, alias, docstring=None):
def f(self):
values = self.asi8
result = get_timedelta_field(values, alias)
if self.hasnans:
if self._hasnans:
result = self._maybe_mask_results(result, fill_value=None,
convert='float64')

Expand Down Expand Up @@ -102,7 +102,7 @@ def wrapper(self, other):
if o_mask.any():
result[o_mask] = nat_result

if self.hasnans:
if self._hasnans:
result[self._isnan] = nat_result

return result
Expand Down Expand Up @@ -704,7 +704,7 @@ def components(self):

columns = ['days', 'hours', 'minutes', 'seconds',
'milliseconds', 'microseconds', 'nanoseconds']
hasnans = self.hasnans
hasnans = self._hasnans
if hasnans:
def f(x):
if isna(x):
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,7 +676,7 @@ def __array__(self, dtype=None):
"""
The array interface, return my values.
"""
return self._data.view(np.ndarray)
return np.asarray(self._data, dtype=dtype)

def __array_wrap__(self, result, context=None):
"""
Expand Down Expand Up @@ -733,6 +733,8 @@ def view(self, cls=None):
Parameters
----------
dtype : numpy dtype or pandas type
Note that any integer `dtype` is treated as ``'int64'``,
regardless of the sign and size.
copy : bool, default True
By default, astype always returns a newly allocated object.
If copy is set to False and internal requirements on dtype are
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,22 @@ class DatetimeIndexOpsMixin(DatetimeLikeArrayMixin):
# override DatetimeLikeArrayMixin method
copy = Index.copy
unique = Index.unique
take = Index.take

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
# subclasses bc they are immutable
inferred_freq = cache_readonly(DatetimeLikeArrayMixin.inferred_freq.fget)
_isnan = cache_readonly(DatetimeLikeArrayMixin._isnan.fget)
hasnans = cache_readonly(DatetimeLikeArrayMixin.hasnans.fget)
hasnans = cache_readonly(DatetimeLikeArrayMixin._hasnans.fget)
_hasnans = hasnans # for index / array -agnostic code
_resolution = cache_readonly(DatetimeLikeArrayMixin._resolution.fget)
resolution = cache_readonly(DatetimeLikeArrayMixin.resolution.fget)

# A few methods that are shared
_maybe_mask_results = DatetimeLikeArrayMixin._maybe_mask_results

# ------------------------------------------------------------------------

def equals(self, other):
"""
Determines if two Index objects contain the same elements.
Expand Down
14 changes: 7 additions & 7 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,11 @@ def __new__(cls, data=None,
"endpoints is deprecated. Use "
"`pandas.date_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result
dtarr = DatetimeArray._generate_range(
start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
return cls(dtarr, name=name)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in #24024 this becomes cls._simple_new(dtarr, name=name). That isn't possible yet here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto below and for TimedeltaIndex

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it worth doing this now then, when we'll just need to change it again?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The upside is that it means the diff in 24024 is just one line. I'm OK with this either way.


if is_scalar(data):
raise TypeError("{cls}() must be called with a "
Expand Down Expand Up @@ -1471,12 +1471,12 @@ def date_range(start=None, end=None, periods=None, freq=None, tz=None,
if freq is None and com._any_none(periods, start, end):
freq = 'D'

result = DatetimeIndex._generate_range(
dtarr = DatetimeArray._generate_range(
start=start, end=end, periods=periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, **kwargs)

result.name = name
result = DatetimeIndex(dtarr, name=name)
return result


Expand Down
31 changes: 14 additions & 17 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,6 @@ def _join_i8_wrapper(joinf, **kwargs):
_left_indexer_unique = _join_i8_wrapper(
libjoin.left_join_indexer_unique_int64, with_indexers=False)

# define my properties & methods for delegation
_other_ops = []
_bool_ops = []
_object_ops = ['freq']
_field_ops = ['days', 'seconds', 'microseconds', 'nanoseconds']
_datetimelike_ops = _field_ops + _object_ops + _bool_ops
_datetimelike_methods = ["to_pytimedelta", "total_seconds",
"round", "floor", "ceil"]

_engine_type = libindex.TimedeltaEngine

_comparables = ['name', 'freq']
Expand All @@ -143,6 +134,14 @@ def _join_i8_wrapper(joinf, **kwargs):

_freq = None

_box_func = TimedeltaArray._box_func
_bool_ops = TimedeltaArray._bool_ops
_object_ops = TimedeltaArray._object_ops
_field_ops = TimedeltaArray._field_ops
_datetimelike_ops = TimedeltaArray._datetimelike_ops
_datetimelike_methods = TimedeltaArray._datetimelike_methods
_other_ops = TimedeltaArray._other_ops

# -------------------------------------------------------------------
# Constructors

Expand All @@ -163,10 +162,9 @@ def __new__(cls, data=None, unit=None, freq=None, start=None, end=None,
"endpoints is deprecated. Use "
"`pandas.timedelta_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result
tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
closed=closed)
return cls(tdarr, name=name)

if is_scalar(data):
raise TypeError('{cls}() must be called with a '
Expand Down Expand Up @@ -764,7 +762,6 @@ def timedelta_range(start=None, end=None, periods=None, freq=None,
freq = 'D'

freq, freq_infer = dtl.maybe_infer_freq(freq)
result = TimedeltaIndex._generate_range(start, end, periods, freq,
closed=closed)
result.name = name
return result
tdarr = TimedeltaArray._generate_range(start, end, periods, freq,
closed=closed)
return TimedeltaIndex(tdarr, name=name)
4 changes: 2 additions & 2 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1592,8 +1592,8 @@ def _right_outer_join(x, y, max_groups):
def _factorize_keys(lk, rk, sort=True):
# Some pre-processing for non-ndarray lk / rk
if is_datetime64tz_dtype(lk) and is_datetime64tz_dtype(rk):
lk = lk.values
rk = rk.values
lk = lk._data
rk = rk._data

elif (is_categorical_dtype(lk) and
is_categorical_dtype(rk) and
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,
- ndarray of Timestamps if box=False
"""
from pandas import DatetimeIndex
from pandas.core.arrays import DatetimeArrayMixin as DatetimeArray
from pandas.core.arrays.datetimes import (
maybe_convert_dtype, objects_to_datetime64ns)

Expand All @@ -179,14 +180,14 @@ def _convert_listlike_datetimes(arg, box, format, name=None, tz=None,

# these are shortcutable
if is_datetime64tz_dtype(arg):
if not isinstance(arg, DatetimeIndex):
if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
return DatetimeIndex(arg, tz=tz, name=name)
if tz == 'utc':
arg = arg.tz_convert(None).tz_localize(tz)
return arg

elif is_datetime64_ns_dtype(arg):
if box and not isinstance(arg, DatetimeIndex):
if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
try:
return DatetimeIndex(arg, tz=tz, name=name)
except ValueError:
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,17 @@
import pandas.util.testing as tm


class TestTimedeltaArrayConstructor(object):
def test_copy(self):
data = np.array([1, 2, 3], dtype='m8[ns]')
arr = TimedeltaArray(data, copy=False)
assert arr._data is data

arr = TimedeltaArray(data, copy=True)
assert arr._data is not data
assert arr._data.base is not data
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This last line is not present in the test in #24024.



class TestTimedeltaArray(object):
def test_from_sequence_dtype(self):
msg = r"Only timedelta64\[ns\] dtype is valid"
Expand Down
Loading