Skip to content

Commit d07b238

Browse files
committed
REGR: NA-values in ctors with string dtype
```python In [1]: import pandas as pd In [2]: pd.Series([1, 2, None], dtype='str')[2] # None ``` Closes pandas-dev#21083
1 parent ab6aaf7 commit d07b238

File tree

4 files changed

+57
-7
lines changed

4 files changed

+57
-7
lines changed

doc/source/whatsnew/v0.23.1.txt

+8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,14 @@ and bug fixes. We recommend that all users upgrade to this version.
1010
:local:
1111
:backlinks: none
1212

13+
.. _whatsnew_0231.fixed_regressions:
14+
15+
Fixed Regressions
16+
~~~~~~~~~~~~~~~~~
17+
18+
- Fixed regression in constructors coercing NA values like ``None`` to strings when passing ``dtype=str`` (:issue:`21083`)
19+
20+
1321
.. _whatsnew_0231.enhancements:
1422

1523
New features

pandas/core/series.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -4054,7 +4054,20 @@ def _try_cast(arr, take_fast_path):
40544054
isinstance(subarr, np.ndarray))):
40554055
subarr = construct_1d_object_array_from_listlike(subarr)
40564056
elif not is_extension_type(subarr):
4057-
subarr = np.array(subarr, dtype=dtype, copy=copy)
4057+
subarr2 = np.array(subarr, dtype=dtype, copy=copy)
4058+
4059+
if dtype and dtype.kind in ("U", "S"):
4060+
# GH-21083
4061+
# We can't just return np.array(subarr, dtype='str') since
4062+
# NumPy will convert the non-string objects into strings
4063+
# Including NA values. Se we have to go
4064+
# string -> object -> update NA, which requires an
4065+
# additional pass over the data.
4066+
na_values = isna(subarr)
4067+
subarr2 = subarr2.astype(object)
4068+
subarr2[na_values] = np.asarray(subarr)[na_values]
4069+
4070+
subarr = subarr2
40584071
except (ValueError, TypeError):
40594072
if is_categorical_dtype(dtype):
40604073
# We *do* allow casting to categorical, since we know

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,14 @@ def test_constructor_complex_dtypes(self):
151151
assert a.dtype == df.a.dtype
152152
assert b.dtype == df.b.dtype
153153

154+
def test_constructor_dtype_str_na_values(self):
155+
# https://github.com/pandas-dev/pandas/issues/21083
156+
df = DataFrame({'A': ['x', None]}, dtype=str)
157+
result = df.isna()
158+
expected = DataFrame({"A": [False, True]})
159+
tm.assert_frame_equal(result, expected)
160+
assert df.iloc[1, 0] is None
161+
154162
def test_constructor_rec(self):
155163
rec = self.frame.to_records(index=False)
156164

pandas/tests/series/test_constructors.py

+27-6
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,17 @@
2929
from .common import TestData
3030

3131

32+
@pytest.fixture(params=[str, 'str', 'U'])
33+
def string_dtype(request):
34+
"""Parametrized fixture for string dtypes.
35+
36+
* str
37+
* 'str'
38+
* 'U'
39+
"""
40+
return request.param
41+
42+
3243
class TestSeriesConstructors(TestData):
3344

3445
def test_invalid_dtype(self):
@@ -137,6 +148,14 @@ def test_constructor_no_data_index_order(self):
137148
result = pd.Series(index=['b', 'a', 'c'])
138149
assert result.index.tolist() == ['b', 'a', 'c']
139150

151+
def test_constructor_dtype_str_na_values(self):
152+
# https://github.com/pandas-dev/pandas/issues/21083
153+
ser = Series(['x', None], dtype=str)
154+
result = ser.isna()
155+
expected = Series([False, True])
156+
tm.assert_series_equal(result, expected)
157+
assert ser.iloc[1] is None
158+
140159
def test_constructor_series(self):
141160
index1 = ['d', 'b', 'a', 'c']
142161
index2 = sorted(index1)
@@ -164,22 +183,24 @@ def test_constructor_list_like(self):
164183

165184
@pytest.mark.parametrize('input_vals', [
166185
([1, 2]),
167-
([1.0, 2.0, np.nan]),
168186
(['1', '2']),
169187
(list(pd.date_range('1/1/2011', periods=2, freq='H'))),
170188
(list(pd.date_range('1/1/2011', periods=2, freq='H',
171189
tz='US/Eastern'))),
172190
([pd.Interval(left=0, right=5)]),
173191
])
174-
def test_constructor_list_str(self, input_vals):
192+
def test_constructor_list_str(self, input_vals, string_dtype):
175193
# GH 16605
176194
# Ensure that data elements from a list are converted to strings
177195
# when dtype is str, 'str', or 'U'
196+
result = Series(input_vals, dtype=string_dtype)
197+
expected = Series(input_vals).astype(string_dtype)
198+
assert_series_equal(result, expected)
178199

179-
for dtype in ['str', str, 'U']:
180-
result = Series(input_vals, dtype=dtype)
181-
expected = Series(input_vals).astype(dtype)
182-
assert_series_equal(result, expected)
200+
def test_constructor_list_str_na(self, string_dtype):
201+
result = Series([1.0, 2.0, np.nan], dtype=string_dtype)
202+
expected = Series(['1.0', '2.0', None], dtype=object)
203+
assert_series_equal(result, expected)
183204

184205
def test_constructor_generator(self):
185206
gen = (i for i in range(10))

0 commit comments

Comments
 (0)