-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Fix (22477) dtype=str converts NaN to 'n' #22564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 11 commits
f069fc2
062786f
a522d7f
c8667dd
4717e36
bdad724
7691c82
00a7ed8
e9a290d
aa6b4a9
ee854d7
64f6e1c
fdad0c5
31021b6
9711d35
086d2b5
27701e0
265f92d
0692db0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,7 +6,7 @@ | |
|
||
from pandas._libs import lib, tslib, tslibs | ||
from pandas._libs.tslibs import OutOfBoundsDatetime, Period, iNaT | ||
from pandas.compat import PY3, string_types, text_type | ||
from pandas.compat import PY3, string_types, text_type, to_str | ||
|
||
from .common import ( | ||
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes, | ||
|
@@ -1217,11 +1217,14 @@ def construct_1d_arraylike_from_scalar(value, length, dtype): | |
dtype = dtype.dtype | ||
|
||
# coerce if we have nan for an integer dtype | ||
# GH 22858: only cast to float if an index | ||
# (passed here as length) is specified | ||
if length and is_integer_dtype(dtype) and isna(value): | ||
dtype = np.float64 | ||
subarr = np.empty(length, dtype=dtype) | ||
if is_integer_dtype(dtype) and isna(value): | ||
dtype = np.dtype('float64') | ||
if isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"): | ||
subarr = np.empty(length, dtype=object) | ||
if not isna(value): | ||
value = to_str(value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. prefer to have all of the dtype checking in the if/elif/else and then construct the subarr after There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In that case, |
||
else: | ||
subarr = np.empty(length, dtype=dtype) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback by putting this here in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and do u gave have a case that doesn’t work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and do u gave have a case that doesn’t work? |
||
subarr.fill(value) | ||
|
||
return subarr | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,6 +134,23 @@ def test_constructor_no_data_index_order(self): | |
result = pd.Series(index=['b', 'a', 'c']) | ||
assert result.index.tolist() == ['b', 'a', 'c'] | ||
|
||
def test_constructor_no_data_string_type(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. pls parametrize these tests |
||
# GH 22477 | ||
result = pd.Series(index=[1], dtype=str) | ||
assert np.isnan(result.iloc[0]) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. check the value using iloc instead here which returns a scalar |
||
@pytest.mark.parametrize('item', ['13']) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you don't need to parameterize this test (only 1 case), and you need to change the name as the next test overwrites it. |
||
def test_constructor_single_element_string_type(self, item): | ||
# GH 22477 | ||
result = pd.Series(int(item), index=[1], dtype=str) | ||
assert result.iloc[0] == item | ||
|
||
@pytest.mark.parametrize('item', ['entry', 'ѐ']) | ||
def test_constructor_string_element_string_type(self, item): | ||
# GH 22477 | ||
result = pd.Series(item, index=[1], dtype=str) | ||
assert result.iloc[0] == item | ||
|
||
def test_constructor_dtype_str_na_values(self, string_dtype): | ||
# https://github.com/pandas-dev/pandas/issues/21083 | ||
ser = Series(['x', None], dtype=string_dtype) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think
dtype.kind in ("U", "S")
can be replaced withis_string_dtype(dtype)
.is_string_dtype
can be found inpandas/core/dtypes/common
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure will replace asap
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this can also be an elif