-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Add na_value argument to DataFrame.to_numpy #33857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 28 commits
054d74e
cafbf5f
34b9b9f
9a2bbd6
f7dc246
5eb8bb2
ec2f729
b48b6c9
d1a60e8
09fdf51
f5db15a
89e8930
d24b976
bec3889
a20f116
02405a1
055413f
ae088e4
d78ba29
9c87e00
df5b683
ae2b34a
bcb69c5
c3a7a55
b5ec43f
f3e45d7
e54cc28
491a5ae
4ecccff
142c808
8d42fd4
c2228bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -781,14 +781,28 @@ def copy_func(ax): | |
res.axes = new_axes | ||
return res | ||
|
||
def as_array(self, transpose: bool = False) -> np.ndarray: | ||
def as_array( | ||
self, | ||
transpose: bool = False, | ||
dtype=None, | ||
copy: bool = False, | ||
na_value=lib.no_default, | ||
) -> np.ndarray: | ||
""" | ||
Convert the blockmanager data into an numpy array. | ||
|
||
Parameters | ||
---------- | ||
transpose : bool, default False | ||
If True, transpose the return array, | ||
If True, transpose the return array. | ||
dtype : object, default None | ||
Data type of the return array. | ||
copy : bool, default False | ||
If True then guarantee that a copy is returned. A value of | ||
False does not guarantee that the underlying data is not | ||
copied. | ||
na_value : object, default lib.no_default | ||
Value to be used as the missing value sentinel. | ||
|
||
Returns | ||
------- | ||
|
@@ -798,24 +812,43 @@ def as_array(self, transpose: bool = False) -> np.ndarray: | |
arr = np.empty(self.shape, dtype=float) | ||
return arr.transpose() if transpose else arr | ||
|
||
should_copy = copy or na_value is not lib.no_default | ||
|
||
if self._is_single_block and self.blocks[0].is_datetimetz: | ||
# TODO(Block.get_values): Make DatetimeTZBlock.get_values | ||
# always be object dtype. Some callers seem to want the | ||
# DatetimeArray (previously DTI) | ||
arr = self.blocks[0].get_values(dtype=object) | ||
elif self._is_single_block and self.blocks[0].is_extension: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you might be able to remove the block aboev (is_datetimetz) as that is an extension block already There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice 👍 |
||
# Avoid implicit conversion of extension blocks to object | ||
arr = ( | ||
self.blocks[0] | ||
.values.to_numpy(dtype=dtype, na_value=na_value) | ||
.reshape(self.blocks[0].shape) | ||
) | ||
elif self._is_single_block or not self.is_mixed_type: | ||
arr = np.asarray(self.blocks[0].get_values()) | ||
if dtype: | ||
arr = arr.astype(dtype, copy=False) | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
arr = self._interleave() | ||
arr = self._interleave(dtype=dtype, na_value=na_value) | ||
should_copy = False | ||
|
||
if should_copy: | ||
arr = arr.copy() | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
if na_value is not lib.no_default: | ||
arr[isna(arr)] = na_value | ||
dsaxton marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
return arr.transpose() if transpose else arr | ||
|
||
def _interleave(self) -> np.ndarray: | ||
def _interleave(self, dtype=None, na_value=lib.no_default) -> np.ndarray: | ||
""" | ||
Return ndarray from blocks with specified item order | ||
Items must be contained in the blocks | ||
""" | ||
dtype = _interleaved_dtype(self.blocks) | ||
if not dtype: | ||
dtype = _interleaved_dtype(self.blocks) | ||
|
||
# TODO: https://github.com/pandas-dev/pandas/issues/22791 | ||
# Give EAs some input on what happens here. Sparse needs this. | ||
|
@@ -830,7 +863,12 @@ def _interleave(self) -> np.ndarray: | |
|
||
for blk in self.blocks: | ||
rl = blk.mgr_locs | ||
result[rl.indexer] = blk.get_values(dtype) | ||
if blk.is_extension: | ||
# Avoid implicit conversion of extension blocks to object | ||
arr = blk.values.to_numpy(dtype=dtype, na_value=na_value) | ||
else: | ||
arr = blk.get_values(dtype) | ||
result[rl.indexer] = arr | ||
itemmask[rl.indexer] = 1 | ||
|
||
if not itemmask.all(): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -365,7 +365,7 @@ def test_to_numpy_copy(self): | |
df = pd.DataFrame(arr) | ||
assert df.values.base is arr | ||
assert df.to_numpy(copy=False).base is arr | ||
assert df.to_numpy(copy=True).base is None | ||
assert df.to_numpy(copy=True).base is not arr | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This had to change because now the output array is a view, but only on the transpose of itself (which is a copy of the original data) |
||
|
||
def test_swapaxes(self): | ||
df = DataFrame(np.random.randn(10, 5)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this needed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's to avoid making a second copy in certain cases where we can detect this (eg with multiple dtypes (going through
_interleave
), the constructed array is never the original values, so an additional copy is not neededThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
then i would just make this
and add an explanation line