Skip to content

REF: Back IntervalArray by array instead of Index #36310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Oct 2, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8987a0e
POC: back IntervalArray by array instead of Index
jbrockmendel Sep 12, 2020
153c87a
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 13, 2020
8164099
Fix failing copy/view tests
jbrockmendel Sep 13, 2020
d545dac
mypy fixup
jbrockmendel Sep 13, 2020
6050ec8
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 16, 2020
bd6231c
Avoid having left and right view the same data
jbrockmendel Sep 16, 2020
548efe6
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 16, 2020
124938e
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 17, 2020
c479e0a
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 17, 2020
c4a2229
Restore left and right as Indexes
jbrockmendel Sep 18, 2020
97a0bed
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 18, 2020
bfa13bb
TST: test_left_right_dont_share_data
jbrockmendel Sep 18, 2020
b45ed46
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 20, 2020
e6d4bd9
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 21, 2020
266512f
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 22, 2020
f16be73
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 22, 2020
4efdc08
pass copy=False
jbrockmendel Sep 22, 2020
1ed9623
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Sep 30, 2020
ed6a932
perf note
jbrockmendel Sep 30, 2020
fee70d8
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 1, 2020
1a22095
revert whatsnew
jbrockmendel Oct 1, 2020
490a8f5
Merge branch 'master' of https://github.com/pandas-dev/pandas into re…
jbrockmendel Oct 1, 2020
865b3fc
update per comments
jbrockmendel Oct 1, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions pandas/_testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -976,8 +976,16 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray")
"""
_check_isinstance(left, right, IntervalArray)

assert_index_equal(left.left, right.left, exact=exact, obj=f"{obj}.left")
assert_index_equal(left.right, right.right, exact=exact, obj=f"{obj}.left")
if left.left.dtype.kind in ["m", "M"]:
# We have a DatetimeArray or Timed
# TODO: `exact` keyword?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe better to

kwargs = {}
if left._left.dtype.kind in ["m", "M"]:
    kwargs['check_freq'] = False
....

assert_equal(left.left, right.left, obj=f"{obj}.left", check_freq=False)
assert_equal(left.right, right.right, obj=f"{obj}.left", check_freq=False)
else:
# doesnt take check_freq
assert_equal(left.left, right.left, obj=f"{obj}.left") # TODO: exact?
assert_equal(left.right, right.right, obj=f"{obj}.left")

assert_attr_equal("closed", left, right, obj=obj)


Expand All @@ -988,20 +996,22 @@ def assert_period_array_equal(left, right, obj="PeriodArray"):
assert_attr_equal("freq", left, right, obj=obj)


def assert_datetime_array_equal(left, right, obj="DatetimeArray"):
def assert_datetime_array_equal(left, right, obj="DatetimeArray", check_freq=True):
__tracebackhide__ = True
_check_isinstance(left, right, DatetimeArray)

assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
assert_attr_equal("freq", left, right, obj=obj)
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("tz", left, right, obj=obj)


def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"):
def assert_timedelta_array_equal(left, right, obj="TimedeltaArray", check_freq=True):
__tracebackhide__ = True
_check_isinstance(left, right, TimedeltaArray)
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
assert_attr_equal("freq", left, right, obj=obj)
if check_freq:
assert_attr_equal("freq", left, right, obj=obj)


def raise_assert_detail(obj, message, left, right, diff=None, index_values=None):
Expand Down
47 changes: 27 additions & 20 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
from pandas.core.dtypes.dtypes import IntervalDtype
from pandas.core.dtypes.generic import (
ABCDatetimeIndex,
ABCIndexClass,
ABCIntervalIndex,
ABCPeriodIndex,
ABCSeries,
Expand All @@ -43,7 +42,7 @@
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
from pandas.core.construction import array
from pandas.core.construction import array, extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.indexes.base import ensure_index

Expand Down Expand Up @@ -162,7 +161,7 @@ def __new__(
):

if isinstance(data, ABCSeries) and is_interval_dtype(data.dtype):
data = data._values
data = data._values # TODO: extract_array?

if isinstance(data, (cls, ABCIntervalIndex)):
left = data.left
Expand Down Expand Up @@ -243,8 +242,8 @@ def _simple_new(
)
raise ValueError(msg)

result._left = left
result._right = right
result._left = extract_array(array(left), extract_numpy=True)
result._right = extract_array(array(right), extract_numpy=True)
result._closed = closed
if verify_integrity:
result._validate()
Expand Down Expand Up @@ -512,14 +511,13 @@ def __getitem__(self, value):
right = self.right[value]

# scalar
if not isinstance(left, ABCIndexClass):
if not isinstance(left, (np.ndarray, ExtensionArray)):
if is_scalar(left) and isna(left):
return self._fill_value
if np.ndim(left) > 1:
# GH#30588 multi-dimensional indexer disallowed
raise ValueError("multi-dimensional indexing not allowed")
return Interval(left, right, self.closed)

return self._shallow_copy(left, right)

def __setitem__(self, key, value):
Expand Down Expand Up @@ -559,12 +557,12 @@ def __setitem__(self, key, value):

# Need to ensure that left and right are updated atomically, so we're
# forced to copy, update the copy, and swap in the new values.
left = self.left.copy(deep=True)
left._values[key] = value_left
left = self.left.copy()
left[key] = value_left
self._left = left

right = self.right.copy(deep=True)
right._values[key] = value_right
right = self.right.copy()
right[key] = value_right
self._right = right

def __eq__(self, other):
Expand Down Expand Up @@ -657,8 +655,10 @@ def fillna(self, value=None, method=None, limit=None):

self._check_closed_matches(value, name="value")

left = self.left.fillna(value=value.left)
right = self.right.fillna(value=value.right)
from pandas import Index

left = Index(self.left).fillna(value=value.left)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

umm why do you need to coerce to .fillna?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ATM self.left is an ndarray which doesnt have fillna

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh

right = Index(self.right).fillna(value=value.right)
return self._shallow_copy(left, right)

@property
Expand All @@ -684,6 +684,7 @@ def astype(self, dtype, copy=True):
array : ExtensionArray or ndarray
ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
"""
from pandas import Index
from pandas.core.arrays.string_ import StringDtype

if dtype is not None:
Expand All @@ -695,8 +696,10 @@ def astype(self, dtype, copy=True):

# need to cast to different subtype
try:
new_left = self.left.astype(dtype.subtype)
new_right = self.right.astype(dtype.subtype)
# We need to use Index rules for astype to prevent casting
# np.nan entries to int subtypes
new_left = Index(self.left).astype(dtype.subtype)
new_right = Index(self.right).astype(dtype.subtype)
except TypeError as err:
msg = (
f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
Expand Down Expand Up @@ -758,13 +761,13 @@ def copy(self):
-------
IntervalArray
"""
left = self.left.copy(deep=True)
right = self.right.copy(deep=True)
left = self.left.copy()
right = self.right.copy()
closed = self.closed
# TODO: Could skip verify_integrity here.
return type(self).from_arrays(left, right, closed=closed)

def isna(self):
def isna(self) -> np.ndarray:
return isna(self.left)

@property
Expand All @@ -790,7 +793,9 @@ def shift(self, periods: int = 1, fill_value: object = None) -> "IntervalArray":

empty_len = min(abs(periods), len(self))
if isna(fill_value):
fill_value = self.left._na_value
from pandas import Index

fill_value = Index(self.left)._na_value
empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
else:
empty = self._from_sequence([fill_value] * empty_len)
Expand Down Expand Up @@ -854,7 +859,9 @@ def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
fill_left = fill_right = fill_value
if allow_fill:
if fill_value is None:
fill_left = fill_right = self.left._na_value
from pandas import Index

fill_left = fill_right = Index(self.left)._na_value
elif is_interval(fill_value):
self._check_closed_matches(fill_value, name="fill_value")
fill_left, fill_right = fill_value.left, fill_value.right
Expand Down
28 changes: 20 additions & 8 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,12 +183,8 @@ def func(intvidx_self, other, sort=False):
)
)
@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)
@inherit_names(
["__array__", "overlaps", "contains", "left", "right", "length"], IntervalArray
)
@inherit_names(
["is_non_overlapping_monotonic", "mid", "closed"], IntervalArray, cache=True
)
@inherit_names(["__array__", "overlaps", "contains"], IntervalArray)
@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)
class IntervalIndex(IntervalMixin, ExtensionIndex):
_typ = "intervalindex"
_comparables = ["name"]
Expand Down Expand Up @@ -407,7 +403,7 @@ def __reduce__(self):
return _new_IntervalIndex, (type(self), d), None

@Appender(Index.astype.__doc__)
def astype(self, dtype, copy=True):
def astype(self, dtype, copy: bool = True):
with rewrite_exception("IntervalArray", type(self).__name__):
new_values = self._values.astype(dtype, copy=copy)
if is_interval_dtype(new_values.dtype):
Expand Down Expand Up @@ -436,7 +432,7 @@ def is_monotonic_decreasing(self) -> bool:
return self[::-1].is_monotonic_increasing

@cache_readonly
def is_unique(self):
def is_unique(self) -> bool:
"""
Return True if the IntervalIndex contains unique elements, else False.
"""
Expand Down Expand Up @@ -891,6 +887,22 @@ def _convert_list_indexer(self, keyarr):

# --------------------------------------------------------------------

@cache_readonly
def left(self) -> Index:
return Index(self._values.left)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

copy=False on these?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add this?


@cache_readonly
def right(self) -> Index:
return Index(self._values.right)

@cache_readonly
def mid(self):
return Index(self._data.mid)

@property
def length(self):
return Index(self._data.length)

@Appender(Index.where.__doc__)
def where(self, cond, other=None):
if other is None:
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_getitem_intlist_intindex_periodvalues(self):
@pytest.mark.parametrize("box", [list, np.array, pd.Index])
def test_getitem_intlist_intervalindex_non_int(self, box):
# GH#33404 fall back to positional since ints are unambiguous
dti = date_range("2000-01-03", periods=3)
dti = date_range("2000-01-03", periods=3)._with_freq(None)
ii = pd.IntervalIndex.from_breaks(dti)
ser = Series(range(len(ii)), index=ii)

Expand Down
14 changes: 7 additions & 7 deletions pandas/tests/util/test_assert_interval_array_equal.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ def test_interval_array_equal_periods_mismatch():
msg = """\
IntervalArray.left are different

IntervalArray.left length are different
\\[left\\]: 5, Int64Index\\(\\[0, 1, 2, 3, 4\\], dtype='int64'\\)
\\[right\\]: 6, Int64Index\\(\\[0, 1, 2, 3, 4, 5\\], dtype='int64'\\)"""
IntervalArray.left shapes are different
\\[left\\]: \\(5,\\)
\\[right\\]: \\(6,\\)"""

with pytest.raises(AssertionError, match=msg):
tm.assert_interval_array_equal(arr1, arr2)
Expand All @@ -58,8 +58,8 @@ def test_interval_array_equal_end_mismatch():
IntervalArray.left are different

IntervalArray.left values are different \\(80.0 %\\)
\\[left\\]: Int64Index\\(\\[0, 2, 4, 6, 8\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[0, 4, 8, 12, 16\\], dtype='int64'\\)"""
\\[left\\]: \\[0, 2, 4, 6, 8\\]
\\[right\\]: \\[0, 4, 8, 12, 16\\]"""

with pytest.raises(AssertionError, match=msg):
tm.assert_interval_array_equal(arr1, arr2)
Expand All @@ -74,8 +74,8 @@ def test_interval_array_equal_start_mismatch():
IntervalArray.left are different

IntervalArray.left values are different \\(100.0 %\\)
\\[left\\]: Int64Index\\(\\[0, 1, 2, 3\\], dtype='int64'\\)
\\[right\\]: Int64Index\\(\\[1, 2, 3, 4\\], dtype='int64'\\)"""
\\[left\\]: \\[0, 1, 2, 3\\]
\\[right\\]: \\[1, 2, 3, 4\\]"""

with pytest.raises(AssertionError, match=msg):
tm.assert_interval_array_equal(arr1, arr2)