Skip to content

Commit e239ab3

Browse files
jbrockmendelPingviinituutti
authored andcommitted
searchsorted, repeat broken off from pandas-dev#24024 (pandas-dev#24461)
1 parent 89cd33c commit e239ab3

File tree

6 files changed

+249
-0
lines changed

6 files changed

+249
-0
lines changed

pandas/core/arrays/datetimelike.py

+135
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from pandas._libs.tslibs.timestamps import (
1313
RoundTo, maybe_integer_op_deprecated, round_nsint64)
1414
import pandas.compat as compat
15+
from pandas.compat.numpy import function as nv
1516
from pandas.errors import (
1617
AbstractMethodError, NullFrequencyError, PerformanceWarning)
1718
from pandas.util._decorators import Appender, Substitution, deprecate_kwarg
@@ -82,6 +83,79 @@ def _get_attributes_dict(self):
8283
"""
8384
return {k: getattr(self, k, None) for k in self._attributes}
8485

86+
@property
87+
def _scalar_type(self):
88+
# type: () -> Union[type, Tuple[type]]
89+
"""The scalar associated with this datelike
90+
91+
* PeriodArray : Period
92+
* DatetimeArray : Timestamp
93+
* TimedeltaArray : Timedelta
94+
"""
95+
raise AbstractMethodError(self)
96+
97+
def _scalar_from_string(self, value):
98+
# type: (str) -> Union[Period, Timestamp, Timedelta, NaTType]
99+
"""
100+
Construct a scalar type from a string.
101+
102+
Parameters
103+
----------
104+
value : str
105+
106+
Returns
107+
-------
108+
Period, Timestamp, or Timedelta, or NaT
109+
Whatever the type of ``self._scalar_type`` is.
110+
111+
Notes
112+
-----
113+
This should call ``self._check_compatible_with`` before
114+
unboxing the result.
115+
"""
116+
raise AbstractMethodError(self)
117+
118+
def _unbox_scalar(self, value):
119+
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> int
120+
"""
121+
Unbox the integer value of a scalar `value`.
122+
123+
Parameters
124+
----------
125+
value : Union[Period, Timestamp, Timedelta]
126+
127+
Returns
128+
-------
129+
int
130+
131+
Examples
132+
--------
133+
>>> self._unbox_scalar(Timedelta('10s')) # DOCTEST: +SKIP
134+
10000000000
135+
"""
136+
raise AbstractMethodError(self)
137+
138+
def _check_compatible_with(self, other):
139+
# type: (Union[Period, Timestamp, Timedelta, NaTType]) -> None
140+
"""
141+
Verify that `self` and `other` are compatible.
142+
143+
* DatetimeArray verifies that the timezones (if any) match
144+
* PeriodArray verifies that the freq matches
145+
* Timedelta has no verification
146+
147+
In each case, NaT is considered compatible.
148+
149+
Parameters
150+
----------
151+
other
152+
153+
Raises
154+
------
155+
Exception
156+
"""
157+
raise AbstractMethodError(self)
158+
85159

86160
class DatelikeOps(object):
87161
"""
@@ -515,6 +589,67 @@ def _values_for_factorize(self):
515589
def _from_factorized(cls, values, original):
516590
return cls(values, dtype=original.dtype)
517591

592+
def _values_for_argsort(self):
593+
return self._data
594+
595+
# ------------------------------------------------------------------
596+
# Additional array methods
597+
# These are not part of the EA API, but we implement them because
598+
# pandas assumes they're there.
599+
600+
def searchsorted(self, value, side='left', sorter=None):
601+
"""
602+
Find indices where elements should be inserted to maintain order.
603+
604+
Find the indices into a sorted array `self` such that, if the
605+
corresponding elements in `value` were inserted before the indices,
606+
the order of `self` would be preserved.
607+
608+
Parameters
609+
----------
610+
value : array_like
611+
Values to insert into `self`.
612+
side : {'left', 'right'}, optional
613+
If 'left', the index of the first suitable location found is given.
614+
If 'right', return the last such index. If there is no suitable
615+
index, return either 0 or N (where N is the length of `self`).
616+
sorter : 1-D array_like, optional
617+
Optional array of integer indices that sort `self` into ascending
618+
order. They are typically the result of ``np.argsort``.
619+
620+
Returns
621+
-------
622+
indices : array of ints
623+
Array of insertion points with the same shape as `value`.
624+
"""
625+
if isinstance(value, compat.string_types):
626+
value = self._scalar_from_string(value)
627+
628+
if not (isinstance(value, (self._scalar_type, type(self)))
629+
or isna(value)):
630+
raise ValueError("Unexpected type for 'value': {valtype}"
631+
.format(valtype=type(value)))
632+
633+
self._check_compatible_with(value)
634+
if isinstance(value, type(self)):
635+
value = value.asi8
636+
else:
637+
value = self._unbox_scalar(value)
638+
639+
return self.asi8.searchsorted(value, side=side, sorter=sorter)
640+
641+
def repeat(self, repeats, *args, **kwargs):
642+
"""
643+
Repeat elements of an array.
644+
645+
See Also
646+
--------
647+
numpy.ndarray.repeat
648+
"""
649+
nv.validate_repeat(args, kwargs)
650+
values = self._data.repeat(repeats)
651+
return type(self)(values, dtype=self.dtype)
652+
518653
# ------------------------------------------------------------------
519654
# Null Handling
520655

pandas/core/arrays/datetimes.py

+21
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ class DatetimeArrayMixin(dtl.DatetimeLikeArrayMixin,
171171
_data
172172
"""
173173
_typ = "datetimearray"
174+
_scalar_type = Timestamp
174175

175176
# define my properties & methods for delegation
176177
_bool_ops = ['is_month_start', 'is_month_end',
@@ -347,6 +348,26 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
347348

348349
return cls._simple_new(index.asi8, freq=freq, tz=tz)
349350

351+
# -----------------------------------------------------------------
352+
# DatetimeLike Interface
353+
354+
def _unbox_scalar(self, value):
355+
if not isinstance(value, self._scalar_type) and value is not NaT:
356+
raise ValueError("'value' should be a Timestamp.")
357+
if not isna(value):
358+
self._check_compatible_with(value)
359+
return value.value
360+
361+
def _scalar_from_string(self, value):
362+
return Timestamp(value, tz=self.tz)
363+
364+
def _check_compatible_with(self, other):
365+
if other is NaT:
366+
return
367+
if not timezones.tz_compare(self.tz, other.tz):
368+
raise ValueError("Timezones don't match. '{own} != {other}'"
369+
.format(own=self.tz, other=other.tz))
370+
350371
# -----------------------------------------------------------------
351372
# Descriptive Properties
352373

pandas/core/arrays/period.py

+22
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class PeriodArray(dtl.DatetimeLikeArrayMixin,
137137
__array_priority__ = 1000
138138
_attributes = ["freq"]
139139
_typ = "periodarray" # ABCPeriodArray
140+
_scalar_type = Period
140141

141142
# Names others delegate to us
142143
_other_ops = []
@@ -240,7 +241,28 @@ def _generate_range(cls, start, end, periods, freq, fields):
240241

241242
return subarr, freq
242243

244+
# -----------------------------------------------------------------
245+
# DatetimeLike Interface
246+
247+
def _unbox_scalar(self, value):
248+
# type: (Union[Period, NaTType]) -> int
249+
if value is NaT:
250+
return value.value
251+
elif isinstance(value, self._scalar_type):
252+
if not isna(value):
253+
self._check_compatible_with(value)
254+
return value.ordinal
255+
else:
256+
raise ValueError("'value' should be a Period. Got '{val}' instead."
257+
.format(val=value))
258+
259+
def _scalar_from_string(self, value):
260+
# type: (str) -> Period
261+
return Period(value, freq=self.freq)
262+
243263
def _check_compatible_with(self, other):
264+
if other is NaT:
265+
return
244266
if self.freqstr != other.freqstr:
245267
_raise_on_incompatible(self, other)
246268

pandas/core/arrays/timedeltas.py

+17
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ def wrapper(self, other):
116116

117117
class TimedeltaArrayMixin(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
118118
_typ = "timedeltaarray"
119+
_scalar_type = Timedelta
119120
__array_priority__ = 1000
120121
# define my properties & methods for delegation
121122
_other_ops = []
@@ -221,6 +222,22 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
221222

222223
return cls._simple_new(index, freq=freq)
223224

225+
# ----------------------------------------------------------------
226+
# DatetimeLike Interface
227+
228+
def _unbox_scalar(self, value):
229+
if not isinstance(value, self._scalar_type) and value is not NaT:
230+
raise ValueError("'value' should be a Timedelta.")
231+
self._check_compatible_with(value)
232+
return value.value
233+
234+
def _scalar_from_string(self, value):
235+
return Timedelta(value)
236+
237+
def _check_compatible_with(self, other):
238+
# we don't have anything to validate.
239+
pass
240+
224241
# ----------------------------------------------------------------
225242
# Array-Like / EA-Interface Methods
226243

pandas/core/indexes/datetimelike.py

+1
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,7 @@ def repeat(self, repeats, axis=None):
477477
nv.validate_repeat(tuple(), dict(axis=axis))
478478
freq = self.freq if is_period_dtype(self) else None
479479
return self._shallow_copy(self.asi8.repeat(repeats), freq=freq)
480+
# TODO: dispatch to _eadata
480481

481482
@Appender(_index_shared_docs['where'] % _index_doc_kwargs)
482483
def where(self, cond, other=None):

pandas/tests/arrays/test_datetimelike.py

+53
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
import numpy as np
33
import pytest
44

5+
import pandas.compat as compat
6+
57
import pandas as pd
68
from pandas.core.arrays import (
79
DatetimeArrayMixin as DatetimeArray, PeriodArray,
@@ -129,6 +131,57 @@ def test_concat_same_type(self):
129131

130132
tm.assert_index_equal(self.index_cls(result), expected)
131133

134+
def test_unbox_scalar(self):
135+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
136+
arr = self.array_cls(data, freq='D')
137+
result = arr._unbox_scalar(arr[0])
138+
assert isinstance(result, (int, compat.long))
139+
140+
result = arr._unbox_scalar(pd.NaT)
141+
assert isinstance(result, (int, compat.long))
142+
143+
with pytest.raises(ValueError):
144+
arr._unbox_scalar('foo')
145+
146+
def test_check_compatible_with(self):
147+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
148+
arr = self.array_cls(data, freq='D')
149+
150+
arr._check_compatible_with(arr[0])
151+
arr._check_compatible_with(arr[:1])
152+
arr._check_compatible_with(pd.NaT)
153+
154+
def test_scalar_from_string(self):
155+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
156+
arr = self.array_cls(data, freq='D')
157+
result = arr._scalar_from_string(str(arr[0]))
158+
assert result == arr[0]
159+
160+
def test_searchsorted(self):
161+
data = np.arange(10, dtype='i8') * 24 * 3600 * 10**9
162+
arr = self.array_cls(data, freq='D')
163+
164+
# scalar
165+
result = arr.searchsorted(arr[1])
166+
assert result == 1
167+
168+
result = arr.searchsorted(arr[2], side="right")
169+
assert result == 3
170+
171+
# own-type
172+
result = arr.searchsorted(arr[1:3])
173+
expected = np.array([1, 2], dtype=np.int64)
174+
tm.assert_numpy_array_equal(result, expected)
175+
176+
result = arr.searchsorted(arr[1:3], side="right")
177+
expected = np.array([2, 3], dtype=np.int64)
178+
tm.assert_numpy_array_equal(result, expected)
179+
180+
# Following numpy convention, NaT goes at the beginning
181+
# (unlike NaN which goes at the end)
182+
result = arr.searchsorted(pd.NaT)
183+
assert result == 0
184+
132185

133186
class TestDatetimeArray(SharedTests):
134187
index_cls = pd.DatetimeIndex

0 commit comments

Comments
 (0)