Skip to content

Commit f0b2ff3

Browse files
Nikoleta-v3jorisvandenbossche
authored andcommitted
BUG: Fix dtype=str converts NaN to 'n' (#22564)
More specifically the cases that seem to have an issue are when: - the series in empty - it's a single element series * Closes #22477
1 parent 1520047 commit f0b2ff3

File tree

4 files changed

+23
-6
lines changed

4 files changed

+23
-6
lines changed

doc/source/whatsnew/v0.24.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,7 @@ Reshaping
14431443
- Bug in :func:`merge_asof` where confusing error message raised when attempting to merge with missing values (:issue:`23189`)
14441444
- Bug in :meth:`DataFrame.nsmallest` and :meth:`DataFrame.nlargest` for dataframes that have a :class:`MultiIndex` for columns (:issue:`23033`).
14451445
- Bug in :meth:`DataFrame.append` with a :class:`Series` with a dateutil timezone would raise a ``TypeError`` (:issue:`23682`)
1446+
- Bug in ``Series`` construction when passing no data and ``dtype=str`` (:issue:`22477`)
14461447

14471448
.. _whatsnew_0240.bug_fixes.sparse:
14481449

pandas/core/dtypes/cast.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from pandas._libs import lib, tslib, tslibs
88
from pandas._libs.tslibs import OutOfBoundsDatetime, Period, iNaT
9-
from pandas.compat import PY3, string_types, text_type
9+
from pandas.compat import PY3, string_types, text_type, to_str
1010

1111
from .common import (
1212
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes,
@@ -1216,11 +1216,16 @@ def construct_1d_arraylike_from_scalar(value, length, dtype):
12161216
if not isinstance(dtype, (np.dtype, type(np.dtype))):
12171217
dtype = dtype.dtype
12181218

1219-
# coerce if we have nan for an integer dtype
1220-
# GH 22858: only cast to float if an index
1221-
# (passed here as length) is specified
12221219
if length and is_integer_dtype(dtype) and isna(value):
1223-
dtype = np.float64
1220+
# coerce if we have nan for an integer dtype
1221+
dtype = np.dtype('float64')
1222+
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
1223+
# we need to coerce to object dtype to avoid
1224+
# to allow numpy to take our string as a scalar value
1225+
dtype = object
1226+
if not isna(value):
1227+
value = to_str(value)
1228+
12241229
subarr = np.empty(length, dtype=dtype)
12251230
subarr.fill(value)
12261231

pandas/core/dtypes/common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def is_datetime64_dtype(arr_or_dtype):
419419
return False
420420
try:
421421
tipo = _get_dtype_type(arr_or_dtype)
422-
except TypeError:
422+
except (TypeError, UnicodeEncodeError):
423423
return False
424424
return issubclass(tipo, np.datetime64)
425425

pandas/tests/series/test_constructors.py

+11
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,17 @@ def test_constructor_no_data_index_order(self):
134134
result = pd.Series(index=['b', 'a', 'c'])
135135
assert result.index.tolist() == ['b', 'a', 'c']
136136

137+
def test_constructor_no_data_string_type(self):
138+
# GH 22477
139+
result = pd.Series(index=[1], dtype=str)
140+
assert np.isnan(result.iloc[0])
141+
142+
@pytest.mark.parametrize('item', ['entry', 'ѐ', 13])
143+
def test_constructor_string_element_string_type(self, item):
144+
# GH 22477
145+
result = pd.Series(item, index=[1], dtype=str)
146+
assert result.iloc[0] == str(item)
147+
137148
def test_constructor_dtype_str_na_values(self, string_dtype):
138149
# https://github.com/pandas-dev/pandas/issues/21083
139150
ser = Series(['x', None], dtype=string_dtype)

0 commit comments

Comments
 (0)