Skip to content

Commit b162d1e

Browse files
committed
Disallow NaN in StringArray constructor
Closes pandas-dev#30966
1 parent bd63ece commit b162d1e

File tree

3 files changed

+44
-14
lines changed

3 files changed

+44
-14
lines changed

pandas/_libs/lib.pyx

+26-4
Original file line numberDiff line numberDiff line change
@@ -1472,12 +1472,30 @@ cdef class Validator:
14721472
Py_ssize_t n
14731473
dtype dtype
14741474
bint skipna
1475+
bint na_only
14751476

14761477
def __cinit__(self, Py_ssize_t n, dtype dtype=np.dtype(np.object_),
1477-
bint skipna=False):
1478+
bint skipna=False,
1479+
bint na_only=False):
1480+
"""
1481+
1482+
Parameters
1483+
----------
1484+
n
1485+
dtype
1486+
skipna
1487+
na_only : bool, default False
1488+
Whether to only treat pandas.NA as NA. Values like None
1489+
and NaN won't be treated as NA.
1490+
1491+
Returns
1492+
-------
1493+
1494+
"""
14781495
self.n = n
14791496
self.dtype = dtype
14801497
self.skipna = skipna
1498+
self.na_only = na_only
14811499

14821500
cdef bint validate(self, ndarray values) except -1:
14831501
if not self.n:
@@ -1530,7 +1548,10 @@ cdef class Validator:
15301548
"must define is_value_typed")
15311549

15321550
cdef bint is_valid_null(self, object value) except -1:
1533-
return value is None or value is C_NA or util.is_nan(value)
1551+
if self.na_only:
1552+
return value is C_NA
1553+
else:
1554+
return value is None or value is C_NA or util.is_nan(value)
15341555

15351556
cdef bint is_array_typed(self) except -1:
15361557
return False
@@ -1625,11 +1646,12 @@ cdef class StringValidator(Validator):
16251646
return issubclass(self.dtype.type, np.str_)
16261647

16271648

1628-
cpdef bint is_string_array(ndarray values, bint skipna=False):
1649+
cpdef bint is_string_array(ndarray values, bint skipna=False, na_only=False):
16291650
cdef:
16301651
StringValidator validator = StringValidator(len(values),
16311652
values.dtype,
1632-
skipna=skipna)
1653+
skipna=skipna,
1654+
na_only=na_only)
16331655
return validator.validate(values)
16341656

16351657

pandas/core/arrays/string_.py

+12-10
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,6 @@ class StringArray(PandasArray):
9393
StringArray is considered experimental. The implementation and
9494
parts of the API may change without warning.
9595
96-
In particular, the NA value used may change to no longer be
97-
``numpy.nan``.
98-
9996
Parameters
10097
----------
10198
values : array-like
@@ -104,8 +101,11 @@ class StringArray(PandasArray):
104101
.. warning::
105102
106103
Currently, this expects an object-dtype ndarray
107-
where the elements are Python strings. This may
108-
change without warning in the future.
104+
where the elements are Python strings or :attr:`pandas.NA`.
105+
This may change without warning in the future. Use
106+
:meth:`pandas.array` with ``dtype="string"`` for a stable way of
107+
creating a `StringArray` from any sequence.
108+
109109
copy : bool, default False
110110
Whether to copy the array of data.
111111
@@ -119,6 +119,8 @@ class StringArray(PandasArray):
119119
120120
See Also
121121
--------
122+
pandas.array
123+
The recommended function for creating a StringArray.
122124
Series.str
123125
The string methods are available on Series backed by
124126
a StringArray.
@@ -164,13 +166,13 @@ def __init__(self, values, copy=False):
164166

165167
def _validate(self):
166168
"""Validate that we only store NA or strings."""
167-
if len(self._ndarray) and not lib.is_string_array(self._ndarray, skipna=True):
168-
raise ValueError(
169-
"StringArray requires a sequence of strings or missing values."
170-
)
169+
if len(self._ndarray) and not lib.is_string_array(
170+
self._ndarray, skipna=True, na_only=True
171+
):
172+
raise ValueError("StringArray requires a sequence of strings or pandas.NA")
171173
if self._ndarray.dtype != "object":
172174
raise ValueError(
173-
"StringArray requires a sequence of strings. Got "
175+
"StringArray requires a sequence of strings or pandas.NA. Got "
174176
f"'{self._ndarray.dtype}' dtype instead."
175177
)
176178

pandas/tests/arrays/string_/test_string.py

+6
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,12 @@ def test_constructor_raises():
194194
with pytest.raises(ValueError, match="sequence of strings"):
195195
pd.arrays.StringArray(np.array([]))
196196

197+
with pytest.raises(ValueError, match="strings or pandas.NA"):
198+
pd.arrays.StringArray(np.array(["a", np.nan], dtype=object))
199+
200+
with pytest.raises(ValueError, match="strings or pandas.NA"):
201+
pd.arrays.StringArray(np.array(["a", None], dtype=object))
202+
197203

198204
@pytest.mark.parametrize("skipna", [True, False])
199205
@pytest.mark.xfail(reason="Not implemented StringArray.sum")

0 commit comments

Comments
 (0)