-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: astype fill_value for SparseArray.astype #23547
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
7b2da4c
232921b
7454e31
9c3856d
49c90b0
1cc43d6
57d32ae
d93d98f
4f4b3a3
3dfc07e
173a28a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -614,7 +614,7 @@ def __array__(self, dtype=None, copy=True): | |
# Can't put pd.NaT in a datetime64[ns] | ||
fill_value = np.datetime64('NaT') | ||
try: | ||
dtype = np.result_type(self.sp_values.dtype, fill_value) | ||
dtype = np.result_type(self.sp_values.dtype, type(fill_value)) | ||
except TypeError: | ||
dtype = object | ||
|
||
|
@@ -996,7 +996,7 @@ def _take_with_fill(self, indices, fill_value=None): | |
if len(self) == 0: | ||
# Empty... Allow taking only if all empty | ||
if (indices == -1).all(): | ||
dtype = np.result_type(self.sp_values, fill_value) | ||
dtype = np.result_type(self.sp_values, type(fill_value)) | ||
taken = np.empty_like(indices, dtype=dtype) | ||
taken.fill(fill_value) | ||
return taken | ||
|
@@ -1009,7 +1009,7 @@ def _take_with_fill(self, indices, fill_value=None): | |
if self.sp_index.npoints == 0: | ||
# Avoid taking from the empty self.sp_values | ||
taken = np.full(sp_indexer.shape, fill_value=fill_value, | ||
dtype=np.result_type(fill_value)) | ||
dtype=np.result_type(type(fill_value))) | ||
else: | ||
taken = self.sp_values.take(sp_indexer) | ||
|
||
|
@@ -1030,12 +1030,12 @@ def _take_with_fill(self, indices, fill_value=None): | |
result_type = taken.dtype | ||
|
||
if m0.any(): | ||
result_type = np.result_type(result_type, self.fill_value) | ||
result_type = np.result_type(result_type, type(self.fill_value)) | ||
taken = taken.astype(result_type) | ||
taken[old_fill_indices] = self.fill_value | ||
|
||
if m1.any(): | ||
result_type = np.result_type(result_type, fill_value) | ||
result_type = np.result_type(result_type, type(fill_value)) | ||
taken = taken.astype(result_type) | ||
taken[new_fill_indices] = fill_value | ||
|
||
|
@@ -1061,7 +1061,7 @@ def _take_without_fill(self, indices): | |
# edge case in take... | ||
# I think just return | ||
out = np.full(indices.shape, self.fill_value, | ||
dtype=np.result_type(self.fill_value)) | ||
dtype=np.result_type(type(self.fill_value))) | ||
arr, sp_index, fill_value = make_sparse(out, | ||
fill_value=self.fill_value) | ||
return type(self)(arr, sparse_index=sp_index, | ||
|
@@ -1073,7 +1073,7 @@ def _take_without_fill(self, indices): | |
|
||
if fillable.any(): | ||
# TODO: may need to coerce array to fill value | ||
result_type = np.result_type(taken, self.fill_value) | ||
result_type = np.result_type(taken, type(self.fill_value)) | ||
taken = taken.astype(result_type) | ||
taken[fillable] = self.fill_value | ||
|
||
|
@@ -1215,10 +1215,26 @@ def astype(self, dtype=None, copy=True): | |
dtype = pandas_dtype(dtype) | ||
|
||
if not isinstance(dtype, SparseDtype): | ||
dtype = SparseDtype(dtype, fill_value=self.fill_value) | ||
fill_value = astype_nansafe(np.array(self.fill_value), | ||
dtype).item() | ||
dtype = SparseDtype(dtype, fill_value=fill_value) | ||
|
||
# Typically we'll just astype the sp_values to dtype.subtype, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is kind ugly, but it's backwards compatible, consistent with the rest of pandas, and does what we need. Basically, unless we want to support actual numpy string dtypes (which we probably don't), then we need a way of differentiating between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make this a method on the Dtype itself to avoid cluttering this up here? maybe There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was playing around with that earlier (didn't push it though). I called it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, can you clarify what you had in mind for I'll have a followup PR soon (hopefully today) for ensuring that the dtype of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right I think adding a method that returns the dtype of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated to add two methods
|
||
# but SparseDtype follows the pandas convention of storing strings | ||
# as object dtype. So SparseDtype(str) immediately becomes | ||
# SparseDtype(object), and at this point we don't know whether object | ||
# means string or something else. We *cannot* just pass object to | ||
# astype_nansafe below, since that won't convert to string. So | ||
# we rely on the assumption that "string fill_value" means strings | ||
# which is close enough to being true. | ||
if (is_object_dtype(dtype.subtype) and | ||
isinstance(dtype.fill_value, compat.text_type)): | ||
subtype = str | ||
else: | ||
subtype = dtype.subtype | ||
|
||
sp_values = astype_nansafe(self.sp_values, | ||
dtype.subtype, | ||
subtype, | ||
copy=copy) | ||
if sp_values is self.sp_values and copy: | ||
sp_values = sp_values.copy() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was having trouble with string fill values.