-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
ENH: Add na_value argument to DataFrame.to_numpy #33857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 20 commits
054d74e
cafbf5f
34b9b9f
9a2bbd6
f7dc246
5eb8bb2
ec2f729
b48b6c9
d1a60e8
09fdf51
f5db15a
89e8930
d24b976
bec3889
a20f116
02405a1
055413f
ae088e4
d78ba29
9c87e00
df5b683
ae2b34a
bcb69c5
c3a7a55
b5ec43f
f3e45d7
e54cc28
491a5ae
4ecccff
142c808
8d42fd4
c2228bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -781,14 +781,20 @@ def copy_func(ax): | |
res.axes = new_axes | ||
return res | ||
|
||
def as_array(self, transpose: bool = False) -> np.ndarray: | ||
def as_array( | ||
self, transpose: bool = False, dtype=None, na_value=lib.no_default | ||
) -> np.ndarray: | ||
""" | ||
Convert the blockmanager data into an numpy array. | ||
|
||
Parameters | ||
---------- | ||
transpose : bool, default False | ||
If True, transpose the return array, | ||
If True, transpose the return array. | ||
dtype : object, default None | ||
Data type of the return array. | ||
na_value : object, default lib.no_default | ||
Value to be used as the missing value sentinel. | ||
|
||
Returns | ||
------- | ||
|
@@ -803,19 +809,34 @@ def as_array(self, transpose: bool = False) -> np.ndarray: | |
# always be object dtype. Some callers seem to want the | ||
# DatetimeArray (previously DTI) | ||
arr = self.blocks[0].get_values(dtype=object) | ||
elif self._is_single_block and self.blocks[0].is_extension: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to remove the block aboev (is_datetimetz) as that is an extension block already There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice 👍 |
||
# Avoid implicit conversion of extension blocks to object | ||
arr = ( | ||
self.blocks[0] | ||
.values.to_numpy(dtype=dtype, na_value=na_value) | ||
.reshape(self.blocks[0].shape) | ||
) | ||
elif self._is_single_block or not self.is_mixed_type: | ||
arr = np.asarray(self.blocks[0].get_values()) | ||
if dtype: | ||
arr = arr.astype(dtype) | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
arr = self._interleave() | ||
arr = self._interleave(dtype=dtype, na_value=na_value) | ||
|
||
if na_value is not lib.no_default: | ||
na_mask = isna(arr) | ||
if na_mask.any(): | ||
arr[na_mask] = na_value | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return arr.transpose() if transpose else arr | ||
|
||
def _interleave(self) -> np.ndarray: | ||
def _interleave(self, dtype=None, na_value=lib.no_default) -> np.ndarray: | ||
""" | ||
Return ndarray from blocks with specified item order | ||
Items must be contained in the blocks | ||
""" | ||
dtype = _interleaved_dtype(self.blocks) | ||
if not dtype: | ||
dtype = _interleaved_dtype(self.blocks) | ||
|
||
# TODO: https://github.com/pandas-dev/pandas/issues/22791 | ||
# Give EAs some input on what happens here. Sparse needs this. | ||
|
@@ -830,7 +851,12 @@ def _interleave(self) -> np.ndarray: | |
|
||
for blk in self.blocks: | ||
rl = blk.mgr_locs | ||
result[rl.indexer] = blk.get_values(dtype) | ||
if blk.is_extension: | ||
# Avoid implicit conversion of extension blocks to object | ||
arr = blk.values.to_numpy(dtype=dtype, na_value=na_value) | ||
else: | ||
arr = blk.get_values(dtype) | ||
result[rl.indexer] = arr | ||
itemmask[rl.indexer] = 1 | ||
|
||
if not itemmask.all(): | ||
|
Uh oh!
There was an error while loading. Please reload this page.