-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF: call _block_shape from EABlock.make_block #33308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f880785
6a5dbd1
09e4fa2
dc21c28
31dcd84
252025e
f46abaa
c71cf57
7935546
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -244,6 +244,8 @@ def make_block(self, values, placement=None) -> "Block": | |
""" | ||
if placement is None: | ||
placement = self.mgr_locs | ||
if self.is_extension: | ||
values = _block_shape(values, ndim=self.ndim) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel is this not a conditional check? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a conditional check, but There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if we are calling .make_block from an EA block, don't we by definition know that we have extension block values? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. no. e.g. if we call
which will be |
||
|
||
return make_block(values, placement=placement, ndim=self.ndim) | ||
|
||
|
@@ -355,13 +357,12 @@ def _split_op_result(self, result) -> List["Block"]: | |
nbs = [] | ||
for i, loc in enumerate(self.mgr_locs): | ||
vals = result[i] | ||
nv = _block_shape(vals, ndim=self.ndim) | ||
block = self.make_block(values=nv, placement=[loc]) | ||
block = self.make_block(values=vals, placement=[loc]) | ||
nbs.append(block) | ||
return nbs | ||
|
||
if not isinstance(result, Block): | ||
result = self.make_block(values=_block_shape(result, ndim=self.ndim)) | ||
result = self.make_block(result) | ||
|
||
return [result] | ||
|
||
|
@@ -1277,9 +1278,6 @@ def take_nd(self, indexer, axis: int, new_mgr_locs=None, fill_tuple=None): | |
def diff(self, n: int, axis: int = 1) -> List["Block"]: | ||
""" return block for the diff of the values """ | ||
new_values = algos.diff(self.values, n, axis=axis, stacklevel=7) | ||
# We use block_shape for ExtensionBlock subclasses, which may call here | ||
# via a super. | ||
new_values = _block_shape(new_values, ndim=self.ndim) | ||
return [self.make_block(values=new_values)] | ||
|
||
def shift(self, periods: int, axis: int = 0, fill_value=None): | ||
|
@@ -2267,7 +2265,7 @@ def concat_same_type(self, to_concat): | |
values = values.astype(object, copy=False) | ||
placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) | ||
|
||
return self.make_block(_block_shape(values, self.ndim), placement=placement) | ||
return self.make_block(values, placement=placement) | ||
return super().concat_same_type(to_concat) | ||
|
||
def fillna(self, value, limit=None, inplace=False, downcast=None): | ||
|
@@ -2456,7 +2454,6 @@ def f(mask, val, idx): | |
# TODO: allow EA once reshape is supported | ||
values = values.reshape(shape) | ||
|
||
values = _block_shape(values, ndim=self.ndim) | ||
return values | ||
|
||
if self.ndim == 2: | ||
|
@@ -2738,9 +2735,7 @@ def concat_same_type(self, to_concat): | |
) | ||
placement = self.mgr_locs if self.ndim == 2 else slice(len(values)) | ||
# not using self.make_block_same_class as values can be object dtype | ||
return self.make_block( | ||
_block_shape(values, ndim=self.ndim), placement=placement | ||
) | ||
return self.make_block(values, placement=placement) | ||
|
||
def replace( | ||
self, | ||
|
@@ -2859,16 +2854,15 @@ def _extend_blocks(result, blocks=None): | |
return blocks | ||
|
||
|
||
def _block_shape(values, ndim=1, shape=None): | ||
def _block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: | ||
""" guarantee the shape of the values to be at least 1 d """ | ||
if values.ndim < ndim: | ||
if shape is None: | ||
shape = values.shape | ||
if not is_extension_array_dtype(values): | ||
# TODO: https://github.com/pandas-dev/pandas/issues/23023 | ||
shape = values.shape | ||
if not is_extension_array_dtype(values.dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this needed? (as you are calling the check in the block constructor). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, we get a bunch of test failures without this check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, i think we DO want to remove this from inside block_shape though, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is the whole point of block_shape, im not clear on what you're suggesting There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you have a check IN the Block constructor AND in the _block_shape function itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you should have 1 or the other There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but we dont have a reshape call in the constructor There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. obviously, i think we should have 2D EAs and not have to hassle with any of this |
||
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 | ||
# block.shape is incorrect for "2D" ExtensionArrays | ||
# We can't, and don't need to, reshape. | ||
values = values.reshape(tuple((1,) + shape)) | ||
values = values.reshape(tuple((1,) + shape)) # type: ignore | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think OK to use cast after using is_* to avoid the #type: ignore There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are we now using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. https://pandas.pydata.org/docs/development/contributing.html#style-guidelines specifically mentions the is_* case and suggests a refactor is the preferred route.
for me, this is OK. but I believe Jeff and Will oppose type:ignore as a reason to pyre accepts this is a reasonable need for gradual typing, see https://pyre-check.org/docs/error-suppression.html#suppression-comment-types. it has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbrockmendel #32785 was opened for this discussion. see #32730 (comment) cc @jorisvandenbossche |
||
return values | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
my point is that is this necessary this (if), or are you saying that this is a perf issue? (e.g. we don't want to always call _block_shape
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ATM we are calling _block_shape in a bunch of places before calling
ExtensionBlock.make_block
. This is just moving all of those calls to one placeThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With some corner-case-caveats, we only want to call it if self.is_extension, and we pretty much always have to call it if self.is_extension (otherwise we could use make_block_same_class