Skip to content

Commit 0227e69

Browse files
jbrockmendeljorisvandenbossche
authored andcommitted
EA: implement+test EA.view (#27633)
1 parent d7dcdf3 commit 0227e69

File tree

13 files changed

+73
-50
lines changed

13 files changed

+73
-50
lines changed

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ objects.
4444
api.extensions.ExtensionArray.argsort
4545
api.extensions.ExtensionArray.astype
4646
api.extensions.ExtensionArray.copy
47+
api.extensions.ExtensionArray.view
4748
api.extensions.ExtensionArray.dropna
4849
api.extensions.ExtensionArray.factorize
4950
api.extensions.ExtensionArray.fillna

pandas/core/arrays/base.py

+23-1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class ExtensionArray:
6464
shift
6565
take
6666
unique
67+
view
6768
_concat_same_type
6869
_formatter
6970
_from_factorized
@@ -146,7 +147,7 @@ class ExtensionArray:
146147
If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
147148
that
148149
149-
1. You defer by raising ``NotImplemented`` when any Series are present
150+
1. You defer by returning ``NotImplemented`` when any Series are present
150151
in `inputs`. Pandas will extract the arrays and call the ufunc again.
151152
2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
152153
Pandas inspect this to determine whether the ufunc is valid for the
@@ -861,6 +862,27 @@ def copy(self) -> ABCExtensionArray:
861862
"""
862863
raise AbstractMethodError(self)
863864

865+
def view(self, dtype=None) -> Union[ABCExtensionArray, np.ndarray]:
866+
"""
867+
Return a view on the array.
868+
869+
Parameters
870+
----------
871+
dtype : str, np.dtype, or ExtensionDtype, optional
872+
Default None
873+
874+
Returns
875+
-------
876+
ExtensionArray
877+
"""
878+
# NB:
879+
# - This must return a *new* object referencing the same data, not self.
880+
# - The only case that *must* be implemented is with dtype=None,
881+
# giving a view with the same dtype as self.
882+
if dtype is not None:
883+
raise NotImplementedError(dtype)
884+
return self[:]
885+
864886
# ------------------------------------------------------------------------
865887
# Printing
866888
# ------------------------------------------------------------------------

pandas/core/arrays/categorical.py

+5-20
Original file line numberDiff line numberDiff line change
@@ -516,19 +516,12 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
516516
return self._set_dtype(dtype)
517517
return np.array(self, dtype=dtype, copy=copy)
518518

519-
@cache_readonly
520-
def ndim(self) -> int:
521-
"""
522-
Number of dimensions of the Categorical
523-
"""
524-
return self._codes.ndim
525-
526519
@cache_readonly
527520
def size(self) -> int:
528521
"""
529522
return the len of myself
530523
"""
531-
return len(self)
524+
return self._codes.size
532525

533526
@cache_readonly
534527
def itemsize(self) -> int:
@@ -1763,18 +1756,10 @@ def ravel(self, order="C"):
17631756
)
17641757
return np.array(self)
17651758

1766-
def view(self):
1767-
"""
1768-
Return a view of myself.
1769-
1770-
For internal compatibility with numpy arrays.
1771-
1772-
Returns
1773-
-------
1774-
view : Categorical
1775-
Returns `self`!
1776-
"""
1777-
return self
1759+
def view(self, dtype=None):
1760+
if dtype is not None:
1761+
raise NotImplementedError(dtype)
1762+
return self._constructor(values=self._codes, dtype=self.dtype, fastpath=True)
17781763

17791764
def to_dense(self):
17801765
"""

pandas/core/arrays/datetimelike.py

+2-12
Original file line numberDiff line numberDiff line change
@@ -544,18 +544,8 @@ def astype(self, dtype, copy=True):
544544
return np.asarray(self, dtype=dtype)
545545

546546
def view(self, dtype=None):
547-
"""
548-
New view on this array with the same data.
549-
550-
Parameters
551-
----------
552-
dtype : numpy dtype, optional
553-
554-
Returns
555-
-------
556-
ndarray
557-
With the specified `dtype`.
558-
"""
547+
if dtype is None or dtype is self.dtype:
548+
return type(self)(self._data, dtype=self.dtype)
559549
return self._data.view(dtype=dtype)
560550

561551
# ------------------------------------------------------------------

pandas/core/arrays/interval.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -695,18 +695,14 @@ def isna(self):
695695
return isna(self.left)
696696

697697
@property
698-
def nbytes(self):
698+
def nbytes(self) -> int:
699699
return self.left.nbytes + self.right.nbytes
700700

701701
@property
702-
def size(self):
702+
def size(self) -> int:
703703
# Avoid materializing self.values
704704
return self.left.size
705705

706-
@property
707-
def shape(self):
708-
return self.left.shape
709-
710706
def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
711707
"""
712708
Take elements from the IntervalArray.

pandas/core/arrays/numpy_.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -245,11 +245,11 @@ def __setitem__(self, key, value):
245245
else:
246246
self._ndarray[key] = value
247247

248-
def __len__(self):
248+
def __len__(self) -> int:
249249
return len(self._ndarray)
250250

251251
@property
252-
def nbytes(self):
252+
def nbytes(self) -> int:
253253
return self._ndarray.nbytes
254254

255255
def isna(self):

pandas/core/arrays/sparse.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -839,7 +839,7 @@ def fill_value(self, value):
839839
self._dtype = SparseDtype(self.dtype.subtype, value)
840840

841841
@property
842-
def kind(self):
842+
def kind(self) -> str:
843843
"""
844844
The kind of sparse index for this array. One of {'integer', 'block'}.
845845
"""
@@ -854,7 +854,7 @@ def _valid_sp_values(self):
854854
mask = notna(sp_vals)
855855
return sp_vals[mask]
856856

857-
def __len__(self):
857+
def __len__(self) -> int:
858858
return self.sp_index.length
859859

860860
@property
@@ -868,7 +868,7 @@ def _fill_value_matches(self, fill_value):
868868
return self.fill_value == fill_value
869869

870870
@property
871-
def nbytes(self):
871+
def nbytes(self) -> int:
872872
return self.sp_values.nbytes + self.sp_index.nbytes
873873

874874
@property
@@ -886,7 +886,7 @@ def density(self):
886886
return r
887887

888888
@property
889-
def npoints(self):
889+
def npoints(self) -> int:
890890
"""
891891
The number of non- ``fill_value`` points.
892892

pandas/tests/extension/arrow/test_bool.py

+4
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,10 @@ def test_copy(self, data):
4141
# __setitem__ does not work, so we only have a smoke-test
4242
data.copy()
4343

44+
def test_view(self, data):
45+
# __setitem__ does not work, so we only have a smoke-test
46+
data.view()
47+
4448

4549
class TestConstructors(BaseArrowTests, base.BaseConstructorsTests):
4650
def test_from_dtype(self, data):

pandas/tests/extension/base/interface.py

+15
Original file line numberDiff line numberDiff line change
@@ -75,3 +75,18 @@ def test_copy(self, data):
7575

7676
data[1] = data[0]
7777
assert result[1] != result[0]
78+
79+
def test_view(self, data):
80+
# view with no dtype should return a shallow copy, *not* the same
81+
# object
82+
assert data[1] != data[0]
83+
84+
result = data.view()
85+
assert result is not data
86+
assert type(result) == type(data)
87+
88+
result[1] = result[0]
89+
assert data[1] == data[0]
90+
91+
# check specifically that the `dtype` kwarg is accepted
92+
data.view(dtype=None)

pandas/tests/extension/decimal/array.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,11 @@ def __setitem__(self, key, value):
137137
value = decimal.Decimal(value)
138138
self._data[key] = value
139139

140-
def __len__(self):
140+
def __len__(self) -> int:
141141
return len(self._data)
142142

143143
@property
144-
def nbytes(self):
144+
def nbytes(self) -> int:
145145
n = len(self)
146146
if n:
147147
return n * sys.getsizeof(self[0])

pandas/tests/extension/json/array.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ def __getitem__(self, item):
8080
elif isinstance(item, abc.Iterable):
8181
# fancy indexing
8282
return type(self)([self.data[i] for i in item])
83+
elif isinstance(item, slice) and item == slice(None):
84+
# Make sure we get a view
85+
return type(self)(self.data)
8386
else:
8487
# slice
8588
return type(self)(self.data[item])
@@ -103,11 +106,11 @@ def __setitem__(self, key, value):
103106
assert isinstance(v, self.dtype.type)
104107
self.data[k] = v
105108

106-
def __len__(self):
109+
def __len__(self) -> int:
107110
return len(self.data)
108111

109112
@property
110-
def nbytes(self):
113+
def nbytes(self) -> int:
111114
return sys.getsizeof(self.data)
112115

113116
def isna(self):

pandas/tests/extension/test_interval.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,10 @@ class TestGrouping(BaseInterval, base.BaseGroupbyTests):
9595

9696

9797
class TestInterface(BaseInterval, base.BaseInterfaceTests):
98-
pass
98+
def test_view(self, data):
99+
# __setitem__ incorrectly makes a copy (GH#27147), so we only
100+
# have a smoke-test
101+
data.view()
99102

100103

101104
class TestReduce(base.BaseNoReduceTests):

pandas/tests/extension/test_sparse.py

+4
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ def test_copy(self, data):
103103
# __setitem__ does not work, so we only have a smoke-test
104104
data.copy()
105105

106+
def test_view(self, data):
107+
# __setitem__ does not work, so we only have a smoke-test
108+
data.view()
109+
106110

107111
class TestConstructors(BaseSparseTests, base.BaseConstructorsTests):
108112
pass

0 commit comments

Comments
 (0)