-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
Preserve Extension type on cross section #22785
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
e8b37da
0197e0c
62326ae
f008c38
88c6126
78798cf
b051424
d6a2479
f796138
78dd81e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,9 +12,6 @@ | |
from pandas.util._validators import validate_bool_kwarg | ||
from pandas.compat import range, map, zip | ||
|
||
from pandas.core.dtypes.dtypes import ( | ||
ExtensionDtype, | ||
PandasExtensionDtype) | ||
from pandas.core.dtypes.common import ( | ||
_NS_DTYPE, | ||
is_datetimelike_v_numeric, | ||
|
@@ -791,6 +788,11 @@ def _interleave(self): | |
""" | ||
dtype = _interleaved_dtype(self.blocks) | ||
|
||
if is_extension_array_dtype(dtype): | ||
# TODO: https://github.com/pandas-dev/pandas/issues/22791 | ||
# Give EAs some input on what happens here. Sparse needs this. | ||
dtype = 'object' | ||
|
||
result = np.empty(self.shape, dtype=dtype) | ||
|
||
if result.shape[0] == 0: | ||
|
@@ -906,14 +908,25 @@ def fast_xs(self, loc): | |
|
||
# unique | ||
dtype = _interleaved_dtype(self.blocks) | ||
|
||
n = len(items) | ||
result = np.empty(n, dtype=dtype) | ||
if is_extension_array_dtype(dtype): | ||
# we'll eventually construct an ExtensionArray. | ||
result = np.empty(n, dtype=object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do people find this confusing? I can either
I chose this implementation because I assume it's slightly for wide dataframes with a numpy type, compared to building a list an then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This implementation looks good to me |
||
else: | ||
result = np.empty(n, dtype=dtype) | ||
|
||
for blk in self.blocks: | ||
# Such assignment may incorrectly coerce NaT to None | ||
# result[blk.mgr_locs] = blk._slice((slice(None), loc)) | ||
for i, rl in enumerate(blk.mgr_locs): | ||
result[rl] = blk._try_coerce_result(blk.iget((i, loc))) | ||
|
||
if is_extension_array_dtype(dtype): | ||
result = dtype.construct_array_type()._from_sequence( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this gauaranteed to be 1d at this point? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
result, dtype=dtype | ||
) | ||
|
||
return result | ||
|
||
def consolidate(self): | ||
|
@@ -1855,16 +1868,22 @@ def _shape_compat(x): | |
|
||
|
||
def _interleaved_dtype(blocks): | ||
if not len(blocks): | ||
return None | ||
# type: (List[Block]) -> Optional[Union[np.dtype, ExtensionDtype]] | ||
"""Find the common dtype for `blocks`. | ||
|
||
dtype = find_common_type([b.dtype for b in blocks]) | ||
Parameters | ||
---------- | ||
blocks : List[Block] | ||
|
||
# only numpy compat | ||
if isinstance(dtype, (PandasExtensionDtype, ExtensionDtype)): | ||
dtype = np.object | ||
Returns | ||
------- | ||
dtype : Optional[Union[np.dtype, ExtensionDtype]] | ||
None is returned when `blocks` is empty. | ||
""" | ||
if not len(blocks): | ||
return None | ||
|
||
return dtype | ||
return find_common_type([b.dtype for b in blocks]) | ||
|
||
|
||
def _consolidate(blocks): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
double backticks on DataFrame
shouldn't this be in the EA section?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed.