Skip to content

Fix issue #31708 Series.astype(str, skipna=True) vanished in the 1.0 release #35060

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 32 additions & 4 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5383,7 +5383,11 @@ def _to_dict_of_blocks(self, copy: bool_t = True):
}

def astype(
self: FrameOrSeries, dtype, copy: bool_t = True, errors: str = "raise"
self: FrameOrSeries,
dtype,
copy: bool_t = True,
errors: str = "raise",
skipna: bool_t = False,
) -> FrameOrSeries:
"""
Cast a pandas object to a specified dtype ``dtype``.
Expand All @@ -5404,6 +5408,10 @@ def astype(

- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object.
skipna : bool, default False
When ``deep=False`` (default) nan values will be casted to proper
dtype.
Preserve nan values when ``skipna=True``.

Returns
-------
Expand Down Expand Up @@ -5499,6 +5507,19 @@ def astype(
1 2020-01-01 19:00:00-05:00
2 2020-01-02 19:00:00-05:00
dtype: datetime64[ns, US/Eastern]

By default NaN values will be casted to dtype:
>>> pd.Series([None, 1]).astype(str)
0 nan
1 1.0
dtype: object

Skip casting NaN values:

>>> pd.Series([None, 1]).astype(str, skipna=True)
0 NaN
1 1.0
dtype: object
"""
if is_dict_like(dtype):
if self.ndim == 1: # i.e. Series
Expand All @@ -5520,7 +5541,12 @@ def astype(
for col_name, col in self.items():
if col_name in dtype:
results.append(
col.astype(dtype=dtype[col_name], copy=copy, errors=errors)
col.astype(
dtype=dtype[col_name],
copy=copy,
errors=errors,
skipna=skipna,
)
)
else:
results.append(col.copy() if copy else col)
Expand All @@ -5529,13 +5555,15 @@ def astype(
# GH 18099/22869: columnwise conversion to extension dtype
# GH 24704: use iloc to handle duplicate column names
results = [
self.iloc[:, i].astype(dtype, copy=copy)
self.iloc[:, i].astype(dtype, copy=copy, skipna=skipna)
for i in range(len(self.columns))
]

else:
# else, only a single dtype is given
new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors,)
new_data = self._mgr.astype(
dtype=dtype, copy=copy, errors=errors, skipna=skipna
)
return self._constructor(new_data).__finalize__(self, method="astype")

# GH 33113: handle empty frame or series
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,9 @@ def f(mask, val, idx):

return self.split_and_operate(None, f, False)

def astype(self, dtype, copy: bool = False, errors: str = "raise"):
def astype(
self, dtype, copy: bool = False, errors: str = "raise", skipna: bool = False
):
"""
Coerce to the new dtype.

Expand All @@ -528,6 +530,10 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
errors : str, {'raise', 'ignore'}, default 'ignore'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
skipna : bool, default False
When ``skipna=False`` (default) nan values will be casted to proper
dtype.
Skip casting nan values when ``skipna=True``.

Returns
-------
Expand Down Expand Up @@ -592,7 +598,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
# _astype_nansafe works fine with 1-d only
vals1d = values.ravel()
try:
values = astype_nansafe(vals1d, dtype, copy=True)
values = astype_nansafe(vals1d, dtype, copy=True, skipna=skipna)
except (ValueError, TypeError):
# e.g. astype_nansafe can fail on object-dtype of strings
# trying to convert to float
Expand Down Expand Up @@ -2094,7 +2100,9 @@ def _maybe_coerce_values(self, values):
assert isinstance(values, np.ndarray), type(values)
return values

def astype(self, dtype, copy: bool = False, errors: str = "raise"):
def astype(
self, dtype, copy: bool = False, errors: str = "raise", skipna: bool = False
):
"""
these automatically copy, so copy=True has no effect
raise on an except if raise == True
Expand All @@ -2113,7 +2121,7 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"):
return self.make_block(values)

# delegate
return super().astype(dtype=dtype, copy=copy, errors=errors)
return super().astype(dtype=dtype, copy=copy, errors=errors, skipna=skipna)

def _can_hold_element(self, element: Any) -> bool:
tipo = maybe_infer_dtype_type(element)
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,11 @@ def downcast(self) -> "BlockManager":
return self.apply("downcast")

def astype(
self, dtype, copy: bool = False, errors: str = "raise"
self, dtype, copy: bool = False, errors: str = "raise", skipna: bool = False
) -> "BlockManager":
return self.apply("astype", dtype=dtype, copy=copy, errors=errors)
return self.apply(
"astype", dtype=dtype, copy=copy, errors=errors, skipna=skipna
)

def convert(
self,
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,3 +495,14 @@ def test_reindex_astype_order_consistency(self):
s1 = s.reindex(new_index).astype(temp_dtype).astype(new_dtype)
s2 = s.astype(temp_dtype).reindex(new_index).astype(new_dtype)
tm.assert_series_equal(s1, s2)

def test_astype_skipna_default(self):
arr = Series([1.0, np.nan, 3.0, 4.0])
result = arr.astype(str)
tm.assert_series_equal(result, Series(["1.0", "nan", "3.0", "4.0"]))

def test_astype_skipna_true(self):
# GH 31708
arr = Series([1.0, np.nan, 3.0, 4.0])
result = arr.astype(str, skipna=True)
tm.assert_series_equal(result, Series(["1.0", np.nan, "3.0", "4.0"]))