Skip to content

diff reduction for 24024 #24543

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
is_extension_type, is_float_dtype, is_int64_dtype, is_object_dtype,
is_period_dtype, is_string_dtype, is_timedelta64_dtype, pandas_dtype)
from pandas.core.dtypes.dtypes import DatetimeTZDtype
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
from pandas.core.dtypes.generic import ABCIndexClass, ABCPandasArray, ABCSeries
from pandas.core.dtypes.missing import isna

from pandas.core import ops
Expand Down Expand Up @@ -224,7 +224,7 @@ def _simple_new(cls, values, freq=None, tz=None):
# for compat with datetime/timedelta/period shared methods,
# we can sometimes get here with int64 values. These represent
# nanosecond UTC (or tz-naive) unix timestamps
values = values.view('M8[ns]')
values = values.view(_NS_DTYPE)

assert values.dtype == 'M8[ns]', values.dtype

Expand Down Expand Up @@ -417,7 +417,7 @@ def tz(self):
Returns None when the array is tz-naive.
"""
# GH 18595
return getattr(self._dtype, "tz", None)
return getattr(self.dtype, "tz", None)

@tz.setter
def tz(self, value):
Expand Down Expand Up @@ -517,10 +517,6 @@ def astype(self, dtype, copy=True):
# ----------------------------------------------------------------
# ExtensionArray Interface

@property
def _ndarray_values(self):
return self._data

@Appender(dtl.DatetimeLikeArrayMixin._validate_fill_value.__doc__)
def _validate_fill_value(self, fill_value):
if isna(fill_value):
Expand Down Expand Up @@ -1568,6 +1564,8 @@ def sequence_to_dt64ns(data, dtype=None, copy=False,
copy = False
elif isinstance(data, ABCSeries):
data = data._values
if isinstance(data, ABCPandasArray):
data = data.to_numpy()

if hasattr(data, "freq"):
# i.e. DatetimeArray/Index
Expand Down
6 changes: 0 additions & 6 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,6 @@ def _check_compatible_with(self, other):
def dtype(self):
return self._dtype

@property
def _ndarray_values(self):
# Ordinals
return self._data

@property
def freq(self):
"""
Expand Down Expand Up @@ -475,7 +470,6 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs):
"""
actually format my specific types
"""
# TODO(DatetimeArray): remove
values = self.astype(object)

if date_format:
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,9 @@ def _addsub_offset_array(self, other, op):
# TimedeltaIndex can only operate with a subset of DateOffset
# subclasses. Incompatible classes will raise AttributeError,
# which we re-raise as TypeError
return dtl.DatetimeLikeArrayMixin._addsub_offset_array(self, other,
op)
return super(TimedeltaArrayMixin, self)._addsub_offset_array(
other, op
)
except AttributeError:
raise TypeError("Cannot add/subtract non-tick DateOffset to {cls}"
.format(cls=type(self).__name__))
Expand Down
21 changes: 21 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):
"""
common ops mixin to support a unified interface datetimelike Index
"""
_data = None # type: DatetimeLikeArrayMixin

# DatetimeLikeArrayMixin assumes subclasses are mutable, so these are
# properties there. They can be made into cache_readonly for Index
Expand All @@ -72,6 +73,9 @@ class DatetimeIndexOpsMixin(ExtensionOpsMixin):

@property
def freq(self):
"""
Return the frequency object if it is set, otherwise None.
"""
return self._eadata.freq

@freq.setter
Expand All @@ -81,6 +85,9 @@ def freq(self, value):

@property
def freqstr(self):
"""
Return the frequency object as a string if it is set, otherwise None.
"""
return self._eadata.freqstr

def unique(self, level=None):
Expand Down Expand Up @@ -111,6 +118,20 @@ def wrapper(self, other):
def _ndarray_values(self):
return self._eadata._ndarray_values

# ------------------------------------------------------------------------
# Abstract data attributes

@property
def values(self):
# type: () -> np.ndarray
# Note: PeriodArray overrides this to return an ndarray of objects.
return self._eadata._data

@property
@Appender(DatetimeLikeArrayMixin.asi8.__doc__)
def asi8(self):
return self._eadata.asi8

# ------------------------------------------------------------------------

def equals(self, other):
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
result._eadata = dtarr
result.name = name
# For groupby perf. See note in indexes/base about _index_data
# TODO: make sure this is updated correctly if edited
result._index_data = result._data
result._reset_identity()
return result
Expand Down
21 changes: 5 additions & 16 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
from pandas.core import common as com
from pandas.core.accessor import delegate_names
from pandas.core.algorithms import unique1d
from pandas.core.arrays.datetimelike import DatelikeOps
from pandas.core.arrays.period import (
PeriodArray, period_array, validate_dtype_freq)
from pandas.core.base import _shared_docs
Expand Down Expand Up @@ -70,9 +69,9 @@ class PeriodDelegateMixin(DatetimelikeDelegateMixin):
typ='property')
@delegate_names(PeriodArray,
PeriodDelegateMixin._delegated_methods,
typ="method")
class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index,
PeriodDelegateMixin):
typ="method",
overwrite=True)
class PeriodIndex(DatetimeIndexOpsMixin, Int64Index, PeriodDelegateMixin):
"""
Immutable ndarray holding ordinal values indicating regular periods in
time such as particular years, quarters, months, etc.
Expand Down Expand Up @@ -291,20 +290,15 @@ def _eadata(self):
def values(self):
return np.asarray(self)

@property
def _values(self):
return self._data

@property
def freq(self):
# TODO(DatetimeArray): remove
# Can't simply use delegate_names since our base class is defining
# freq
return self._data.freq

@freq.setter
def freq(self, value):
value = Period._maybe_convert_freq(value)
# TODO: When this deprecation is enforced, PeriodIndex.freq can
# be removed entirely, and we'll just inherit.
msg = ('Setting {cls}.freq has been deprecated and will be '
'removed in a future version; use {cls}.asfreq instead. '
'The {cls}.freq setter is not guaranteed to work.')
Expand Down Expand Up @@ -897,11 +891,6 @@ def flags(self):
FutureWarning, stacklevel=2)
return self._ndarray_values.flags

@property
def asi8(self):
# TODO(DatetimeArray): remove
return self.view('i8')

def item(self):
"""
return the first element of the underlying data as a python
Expand Down
5 changes: 2 additions & 3 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ class TimedeltaDelegateMixin(DatetimelikeDelegateMixin):
@delegate_names(TimedeltaArray,
TimedeltaDelegateMixin._delegated_methods,
typ="method", overwrite=False)
class TimedeltaIndex(DatetimeIndexOpsMixin,
dtl.TimelikeOps, Int64Index, TimedeltaDelegateMixin):
class TimedeltaIndex(DatetimeIndexOpsMixin, dtl.TimelikeOps, Int64Index,
TimedeltaDelegateMixin):
"""
Immutable ndarray of timedelta64 data, represented internally as int64, and
which can be boxed to timedelta objects
Expand Down Expand Up @@ -238,7 +238,6 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
result._eadata = tdarr
result.name = name
# For groupby perf. See note in indexes/base about _index_data
# TODO: make sure this is updated correctly if edited
result._index_data = tdarr._data

result._reset_identity()
Expand Down
25 changes: 15 additions & 10 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2165,7 +2165,7 @@ def should_store(self, value):


class DatetimeLikeBlockMixin(object):
"""Mixin class for DatetimeBlock and DatetimeTZBlock."""
"""Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock."""

@property
def _holder(self):
Expand Down Expand Up @@ -2857,15 +2857,17 @@ def to_native_types(self, slicer=None, na_rep=None, date_format=None,
""" convert to our native types format, slicing if desired """

values = self.values
i8values = self.values.view('i8')

if slicer is not None:
values = values[..., slicer]
i8values = i8values[..., slicer]

from pandas.io.formats.format import _get_format_datetime64_from_values
format = _get_format_datetime64_from_values(values, date_format)

result = tslib.format_array_from_datetime(
values.view('i8').ravel(), tz=getattr(self.values, 'tz', None),
format=format, na_rep=na_rep).reshape(values.shape)
i8values.ravel(), tz=getattr(self.values, 'tz', None),
format=format, na_rep=na_rep).reshape(i8values.shape)
return np.atleast_2d(result)

def should_store(self, value):
Expand Down Expand Up @@ -3115,8 +3117,16 @@ def get_block_type(values, dtype=None):
dtype = dtype or values.dtype
vtype = dtype.type

if is_categorical(values):
if is_sparse(dtype):
# Need this first(ish) so that Sparse[datetime] is sparse
cls = ExtensionBlock
elif is_categorical(values):
cls = CategoricalBlock
elif issubclass(vtype, np.datetime64):
assert not is_datetime64tz_dtype(values)
cls = DatetimeBlock
elif is_datetime64tz_dtype(values):
cls = DatetimeTZBlock
elif is_interval_dtype(dtype) or is_period_dtype(dtype):
cls = ObjectValuesExtensionBlock
elif is_extension_array_dtype(values):
Expand All @@ -3128,11 +3138,6 @@ def get_block_type(values, dtype=None):
cls = TimeDeltaBlock
elif issubclass(vtype, np.complexfloating):
cls = ComplexBlock
elif issubclass(vtype, np.datetime64):
assert not is_datetime64tz_dtype(values)
cls = DatetimeBlock
elif is_datetime64tz_dtype(values):
cls = DatetimeTZBlock
elif issubclass(vtype, np.integer):
cls = IntBlock
elif dtype == np.bool_:
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1539,17 +1539,20 @@ def wrapper(left, right):
raise TypeError("{typ} cannot perform the operation "
"{op}".format(typ=type(left).__name__, op=str_rep))

elif (is_extension_array_dtype(left) or
(is_extension_array_dtype(right) and not is_scalar(right))):
# GH#22378 disallow scalar to exclude e.g. "category", "Int64"
return dispatch_to_extension_op(op, left, right)

elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
# Give dispatch_to_index_op a chance for tests like
# test_dt64_series_add_intlike, which the index dispatching handles
# specifically.
result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
return construct_result(left, result,
index=left.index, name=res_name,
dtype=result.dtype)

elif (is_extension_array_dtype(left) or
(is_extension_array_dtype(right) and not is_scalar(right))):
# GH#22378 disallow scalar to exclude e.g. "category", "Int64"
return dispatch_to_extension_op(op, left, right)

elif is_timedelta64_dtype(left):
result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
return construct_result(left, result,
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@


class TestDatetimeArrayConstructor(object):
def test_from_pandas_array(self):
arr = pd.array(np.arange(5, dtype=np.int64)) * 3600 * 10**9

result = DatetimeArray._from_sequence(arr, freq='infer')

expected = pd.date_range('1970-01-01', periods=5, freq='H')._eadata
tm.assert_datetime_array_equal(result, expected)

def test_mismatched_timezone_raises(self):
arr = DatetimeArray(np.array(['2000-01-01T06:00:00'], dtype='M8[ns]'),
dtype=DatetimeTZDtype(tz='US/Central'))
Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/frame/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3245,7 +3245,9 @@ def test_setitem(self):
b1 = df._data.blocks[1]
b2 = df._data.blocks[2]
assert b1.values.equals(b2.values)
assert id(b1.values.values.base) != id(b2.values.values.base)
if b1.values.values.base is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a comment here on what you are checking

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be covered by the comment 2 lines up

# base being None suffices to assure a copy was made
assert id(b1.values.values.base) != id(b2.values.values.base)

# with nan
df2 = df.copy()
Expand Down