-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
WIP/PERF: block-wise ops for frame-with-series axis=1 #32997
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
912c2d0
c33ba80
0a42227
4e401cb
679e5d3
84f287e
5b92d03
0f0ec28
02ac976
0ad9c82
964cedb
23ac98a
8e5ba59
3aa4226
bc24a8c
628513e
92cf475
ebeb6bc
6473fcd
192d736
10c7b04
662aef3
b295c02
804dfc6
1db3b09
563da98
78e0ccd
e8acc26
6d74398
21d3859
c8651ed
5edf4e1
5717ae5
88806dd
579a31a
0617a17
968ba87
7a9c3f6
577ebf4
9738893
2a1cb23
37938db
742c962
30d6c2e
f9d3895
5abec0b
ae38398
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
""" | ||
Implementations of high-level numpy functions that are ExtensionArray-compatible. | ||
""" | ||
import numpy as np | ||
|
||
from pandas._typing import ArrayLike | ||
|
||
|
||
def tile(arr: ArrayLike, shape) -> ArrayLike: | ||
raise NotImplementedError | ||
|
||
|
||
def broadcast_to(arr: ArrayLike, shape, orient=None) -> ArrayLike: | ||
if isinstance(arr, np.ndarray): | ||
values = arr | ||
else: | ||
# ExtensionArray | ||
values = arr._values_for_factorize()[0] | ||
|
||
# TODO: if we are ndim==size==1 it shouldnt matter whether rowlike/columnlike? | ||
if values.ndim == 1 and orient is not None: | ||
# SUpport treating a 1-dimensional array as either a row or column | ||
assert orient in ["rowlike", "columnlike"] | ||
if orient == "rowlike": | ||
values = values.reshape(1, -1) | ||
else: | ||
values = values.reshape(-1, 1) | ||
|
||
btvalues = np.broadcast_to(values, shape) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the result There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the way its used in this PR, it is not returned to a user, but used as an intermediate object in the arithmetic op |
||
if isinstance(arr, np.ndarray): | ||
result = btvalues | ||
else: | ||
result = type(arr)._from_factorized(btvalues, arr) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Was the result of that other discussion not to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That has more or less stalled. joris made a good point that e.g. fletcher doesnt use _values_for_factorize, so ive come around to the opinion that we shouldn't have _values_for_factorize/_from_factorized at all, since they are each only used once in EA.factorize. Then the issue becomes whether we can use _values_for_argsort, and if we have a constructor that can round-trip those. |
||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -455,6 +455,12 @@ def ravel(self, *args, **kwargs): | |
data = self._data.ravel(*args, **kwargs) | ||
return type(self)(data, dtype=self.dtype) | ||
|
||
@property | ||
def T(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is this used? In an only datetimelike context? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
# Note: we drop any freq | ||
data = self._data.T | ||
return type(self)(data, dtype=self.dtype) | ||
|
||
@property | ||
def _box_func(self): | ||
""" | ||
|
@@ -561,7 +567,7 @@ def __getitem__(self, key): | |
else: | ||
key = check_array_indexer(self, key) | ||
|
||
is_period = is_period_dtype(self) | ||
is_period = is_period_dtype(self.dtype) | ||
if is_period: | ||
freq = self.freq | ||
else: | ||
|
@@ -577,7 +583,7 @@ def __getitem__(self, key): | |
freq = self.freq | ||
|
||
result = getitem(key) | ||
if result.ndim > 1: | ||
if result.ndim > 1 and not is_period and not is_datetime64tz_dtype(self.dtype): | ||
# To support MPL which performs slicing with 2 dim | ||
# even though it only has 1 dim by definition | ||
return result | ||
|
@@ -1208,9 +1214,13 @@ def _add_timedelta_arraylike(self, other): | |
|
||
self_i8 = self.asi8 | ||
other_i8 = other.asi8 | ||
# TODO: do we need to worry about these having the same row/column order? | ||
new_values = checked_add_with_arr( | ||
self_i8, other_i8, arr_mask=self._isnan, b_mask=other._isnan | ||
) | ||
self_i8.ravel(), | ||
other_i8.ravel(), | ||
arr_mask=self._isnan.ravel(), | ||
b_mask=other._isnan.ravel(), | ||
).reshape(self.shape) | ||
if self._hasnans or other._hasnans: | ||
mask = (self._isnan) | (other._isnan) | ||
new_values[mask] = iNaT | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,6 +32,7 @@ | |
validate_dtype_freq, | ||
) | ||
import pandas.core.common as com | ||
from pandas.core.indexers import deprecate_ndim_indexing | ||
import pandas.core.indexes.base as ibase | ||
from pandas.core.indexes.base import ( | ||
InvalidIndexError, | ||
|
@@ -350,6 +351,17 @@ def _int64index(self) -> Int64Index: | |
# ------------------------------------------------------------------------ | ||
# Index Methods | ||
|
||
def __getitem__(self, key): | ||
# PeriodArray.__getitem__ returns PeriodArray for 2D lookups, | ||
# so we need to issue deprecation warning and cast here | ||
Comment on lines
+354
to
+356
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you planning to split this into it's own PR? Don't want to have it held up by review here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hadnt planned on it, but might as well. this PR is back-burner until at least the frame-with-frame case is done |
||
result = super().__getitem__(key) | ||
|
||
if isinstance(result, PeriodIndex) and result._data.ndim == 2: | ||
# this are not actually a valid Index object | ||
deprecate_ndim_indexing(result._data) | ||
return result._data._data | ||
return result | ||
|
||
def __array_wrap__(self, result, context=None): | ||
""" | ||
Gets called after a ufunc. Needs additional handling as | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this leftover debug stuff?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
more or less, will remove