Skip to content

Commit a11766f

Browse files
hack-cjreback
hack-c
authored andcommitted
ENH BUG DOC TST fixed behavior of str_cat w.r.t. nan handling and added friendly error message
DOC corrected docstring for str_cat closes pandas-dev#12297 closes pandas-dev#11435 closes pandas-dev#11334
1 parent da523e0 commit a11766f

File tree

3 files changed

+56
-9
lines changed

3 files changed

+56
-9
lines changed

doc/source/whatsnew/v0.18.0.txt

+18
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,24 @@ the ``extractall`` method returns all matches.
249249

250250
s.str.extractall("(?P<letter>[ab])(?P<digit>\d)")
251251

252+
Changes to str.cat
253+
^^^^^^^^^^^^^^^^^^
254+
255+
The :ref:`.str.cat <text.cat>` concatenates the members of a ``Series``. Before, if ``NaN`` values were present in the Series, calling ``.str.cat()`` on it would return ``NaN``, unlike the rest of the ``Series.str.*`` API. This behavior has been amended to ignore ``NaN`` values by default. (:issue:`11435`).
256+
257+
A new, friendlier ``ValueError`` is added to protect against the mistake of supplying the ``sep`` as an arg, rather than as a kwarg. (:issue:`11334`).
258+
259+
.. ipython:: python
260+
261+
Series(['a','b',np.nan,'c']).str.cat(sep=' ')
262+
Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?')
263+
264+
.. code-block:: python
265+
266+
In [2]: Series(['a','b',np.nan,'c']).str.cat(' ')
267+
ValueError: Did you mean to supply a `sep` keyword?
268+
269+
252270
.. _whatsnew_0180.enhancements.rounding:
253271

254272
Datetimelike rounding

pandas/core/strings.py

+23-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
22

33
from pandas.compat import zip
4-
from pandas.core.common import (isnull, _values_from_object, is_bool_dtype,
4+
from pandas.core.common import (isnull, notnull, _values_from_object,
5+
is_bool_dtype,
56
is_list_like, is_categorical_dtype,
67
is_object_dtype, take_1d)
78
import pandas.compat as compat
@@ -37,14 +38,23 @@ def str_cat(arr, others=None, sep=None, na_rep=None):
3738
If None, returns str concatenating strings of the Series
3839
sep : string or None, default None
3940
na_rep : string or None, default None
40-
If None, an NA in any array will propagate
41+
If None, NA in the series are ignored.
4142
4243
Returns
4344
-------
4445
concat : Series/Index of objects or str
4546
4647
Examples
4748
--------
49+
When ``na_rep`` is `None` (default behavior), NaN value(s)
50+
in the Series are ignored.
51+
52+
>>> Series(['a','b',np.nan,'c']).str.cat(sep=' ')
53+
'a b c'
54+
55+
>>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?')
56+
'a b ? c'
57+
4858
If ``others`` is specified, corresponding values are
4959
concatenated with the separator. Result will be a Series of strings.
5060
@@ -103,18 +113,23 @@ def str_cat(arr, others=None, sep=None, na_rep=None):
103113
arr = np.asarray(arr, dtype=object)
104114
mask = isnull(arr)
105115
if na_rep is None and mask.any():
106-
return np.nan
116+
if sep == '':
117+
na_rep = ''
118+
else:
119+
return sep.join(arr[notnull(arr)])
107120
return sep.join(np.where(mask, na_rep, arr))
108121

109122

110123
def _length_check(others):
111124
n = None
112125
for x in others:
113-
if n is None:
114-
n = len(x)
115-
elif len(x) != n:
116-
raise ValueError('All arrays must be same length')
117-
126+
try:
127+
if n is None:
128+
n = len(x)
129+
elif len(x) != n:
130+
raise ValueError('All arrays must be same length')
131+
except TypeError:
132+
raise ValueError("Did you mean to supply a `sep` keyword?")
118133
return n
119134

120135

pandas/tests/test_strings.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ def test_cat(self):
100100

101101
# single array
102102
result = strings.str_cat(one)
103-
self.assertTrue(isnull(result))
103+
exp = 'aabbc'
104+
self.assertEqual(result, exp)
104105

105106
result = strings.str_cat(one, na_rep='NA')
106107
exp = 'aabbcNA'
@@ -114,6 +115,10 @@ def test_cat(self):
114115
exp = 'a_a_b_b_c_NA'
115116
self.assertEqual(result, exp)
116117

118+
result = strings.str_cat(two, sep='-')
119+
exp = 'a-b-d-foo'
120+
self.assertEqual(result, exp)
121+
117122
# Multiple arrays
118123
result = strings.str_cat(one, [two], na_rep='NA')
119124
exp = ['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA']
@@ -2453,6 +2458,15 @@ def test_cat_on_filtered_index(self):
24532458

24542459
self.assertEqual(str_multiple.loc[1], '2011 2 2')
24552460

2461+
def test_str_cat_raises_intuitive_error(self):
2462+
# https://github.com/pydata/pandas/issues/11334
2463+
s = Series(['a', 'b', 'c', 'd'])
2464+
message = "Did you mean to supply a `sep` keyword?"
2465+
with tm.assertRaisesRegexp(ValueError, message):
2466+
s.str.cat('|')
2467+
with tm.assertRaisesRegexp(ValueError, message):
2468+
s.str.cat(' ')
2469+
24562470
def test_index_str_accessor_visibility(self):
24572471
from pandas.core.strings import StringMethods
24582472

0 commit comments

Comments
 (0)