Skip to content

searchsorted, repeat broken off from #24024 #24461

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Dec 28, 2018
135 changes: 135 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from pandas._libs.tslibs.timestamps import (
RoundTo, maybe_integer_op_deprecated, round_nsint64)
import pandas.compat as compat
from pandas.compat.numpy import function as nv
from pandas.errors import (
AbstractMethodError, NullFrequencyError, PerformanceWarning)
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
Expand Down Expand Up @@ -82,6 +83,79 @@ def _get_attributes_dict(self):
"""
return {k: getattr(self, k, None) for k in self._attributes}

@property
def _scalar_type(self):
# type: () -> Union[type, Tuple[type]]
"""The scalar associated with this datelike

* PeriodArray : Period
* DatetimeArray : Timestamp
* TimedeltaArray : Timedelta
"""
raise AbstractMethodError(self)

def _scalar_from_string(self, value):
# type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
"""
Construct a scalar type from a string.

Parameters
----------
value : str

Returns
-------
Period, Timestamp, or Timedelta, or NaT
Whatever the type of ``self._scalar_type`` is.

Notes
-----
This should call ``self._check_compatible_with`` before
unboxing the result.
"""
raise AbstractMethodError(self)

def _unbox_scalar(self, value):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
"""
Unbox the integer value of a scalar `value`.

Parameters
----------
value : Union[Period, Timestamp, Timedelta]

Returns
-------
int

Examples
--------
>>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP
10000000000
"""
raise AbstractMethodError(self)

def _check_compatible_with(self, other):
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
"""
Verify that `self` and `other` are compatible.

* DatetimeArray verifies that the timezones (if any) match
* PeriodArray verifies that the freq matches
* Timedelta has no verification

In each case, NaT is considered compatible.

Parameters
----------
other

Raises
------
Exception
"""
raise AbstractMethodError(self)


class DatelikeOps(object):
"""
Expand Down Expand Up @@ -515,6 +589,67 @@ def _values_for_factorize(self):
def _from_factorized(cls, values, original):
return cls(values, dtype=original.dtype)

def _values_for_argsort(self):
return self._data

# ------------------------------------------------------------------
# Additional array methods
# These are not part of the EA API, but we implement them because
# pandas assumes they're there.

def searchsorted(self, value, side='left', sorter=None):
"""
Find indices where elements should be inserted to maintain order.

Find the indices into a sorted array `self` such that, if the
corresponding elements in `value` were inserted before the indices,
the order of `self` would be preserved.

Parameters
----------
value : array_like
Values to insert into `self`.
side : {'left', 'right'}, optional
If 'left', the index of the first suitable location found is given.
If 'right', return the last such index. If there is no suitable
index, return either 0 or N (where N is the length of `self`).
sorter : 1-D array_like, optional
Optional array of integer indices that sort `self` into ascending
order. They are typically the result of ``np.argsort``.

Returns
-------
indices : array of ints
Array of insertion points with the same shape as `value`.
"""
if isinstance(value, compat.string_types):
value = self._scalar_from_string(value)

if not (isinstance(value, (self._scalar_type, type(self)))
or isna(value)):
raise ValueError("Unexpected type for 'value': {valtype}"
.format(valtype=type(value)))

self._check_compatible_with(value)
if isinstance(value, type(self)):
value = value.asi8
else:
value = self._unbox_scalar(value)

return self.asi8.searchsorted(value, side=side, sorter=sorter)

def repeat(self, repeats, *args, **kwargs):
"""
Repeat elements of an array.

See Also
--------
numpy.ndarray.repeat
"""
nv.validate_repeat(args, kwargs)
values = self._data.repeat(repeats)
return type(self)(values, dtype=self.dtype)

# ------------------------------------------------------------------
# Null Handling

Expand Down
21 changes: 21 additions & 0 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
_data
"""
_typ = "datetimearray"
_scalar_type = Timestamp

# define my properties & methods for delegation
_bool_ops = ['is_month_start', 'is_month_end',
Expand Down Expand Up @@ -347,6 +348,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None,

return cls._simple_new(index.asi8, freq=freq, tz=tz)

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timestamp.")
if not isna(value):
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timestamp(value, tz=self.tz)

def _check_compatible_with(self, other):
if other is NaT:
return
if not timezones.tz_compare(self.tz, other.tz):
raise ValueError("Timezones don't match. '{own} != {other}'"
.format(own=self.tz, other=other.tz))

# -----------------------------------------------------------------
# Descriptive Properties

Expand Down
22 changes: 22 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin,
__array_priority__ = 1000
_attributes = ["freq"]
_typ = "periodarray" # ABCPeriodArray
_scalar_type = Period

# Names others delegate to us
_other_ops = []
Expand Down Expand Up @@ -240,7 +241,28 @@ def _generate_range(cls, start, end, periods, freq, fields):

return subarr, freq

# -----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
# type: (Union[Period, NaTType]) -> int
if value is NaT:
return value.value
elif isinstance(value, self._scalar_type):
if not isna(value):
self._check_compatible_with(value)
return value.ordinal
else:
raise ValueError("'value' should be a Period. Got '{val}' instead."
.format(val=value))

def _scalar_from_string(self, value):
# type: (str) -> Period
return Period(value, freq=self.freq)

def _check_compatible_with(self, other):
if other is NaT:
return
if self.freqstr != other.freqstr:
_raise_on_incompatible(self, other)

Expand Down
17 changes: 17 additions & 0 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def wrapper(self, other):

class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
_typ = "timedeltaarray"
_scalar_type = Timedelta
__array_priority__ = 1000
# define my properties & methods for delegation
_other_ops = []
Expand Down Expand Up @@ -221,6 +222,22 @@ def _generate_range(cls, start, end, periods, freq, closed=None):

return cls._simple_new(index, freq=freq)

# ----------------------------------------------------------------
# DatetimeLike Interface

def _unbox_scalar(self, value):
if not isinstance(value, self._scalar_type) and value is not NaT:
raise ValueError("'value' should be a Timedelta.")
self._check_compatible_with(value)
return value.value

def _scalar_from_string(self, value):
return Timedelta(value)

def _check_compatible_with(self, other):
# we don't have anything to validate.
pass

# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

Expand Down
1 change: 1 addition & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ def repeat(self, repeats, axis=None):
nv.validate_repeat(tuple(), dict(axis=axis))
freq = self.freq if is_period_dtype(self) else None
return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
# TODO: dispatch to _eadata

@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
def where(self, cond, other=None):
Expand Down
53 changes: 53 additions & 0 deletions pandas/tests/arrays/test_datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import numpy as np
import pytest

import pandas.compat as compat

import pandas as pd
from pandas.core.arrays import (
DatetimeArrayMixin as DatetimeArray, PeriodArray,
Expand Down Expand Up @@ -129,6 +131,57 @@ def test_concat_same_type(self):

tm.assert_index_equal(self.index_cls(result), expected)

def test_unbox_scalar(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._unbox_scalar(arr[0])
assert isinstance(result, (int, compat.long))

result = arr._unbox_scalar(pd.NaT)
assert isinstance(result, (int, compat.long))

with pytest.raises(ValueError):
arr._unbox_scalar('foo')

def test_check_compatible_with(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

arr._check_compatible_with(arr[0])
arr._check_compatible_with(arr[:1])
arr._check_compatible_with(pd.NaT)

def test_scalar_from_string(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')
result = arr._scalar_from_string(str(arr[0]))
assert result == arr[0]

def test_searchsorted(self):
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
arr = self.array_cls(data, freq='D')

# scalar
result = arr.searchsorted(arr[1])
assert result == 1

result = arr.searchsorted(arr[2], side="right")
assert result == 3

# own-type
result = arr.searchsorted(arr[1:3])
expected = np.array([1, 2], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

result = arr.searchsorted(arr[1:3], side="right")
expected = np.array([2, 3], dtype=np.int64)
tm.assert_numpy_array_equal(result, expected)

# Following numpy convention, NaT goes at the beginning
# (unlike NaN which goes at the end)
result = arr.searchsorted(pd.NaT)
assert result == 0


class TestDatetimeArray(SharedTests):
index_cls = pd.DatetimeIndex
Expand Down