diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index 740da62d4f11c..a904e45dd47c9 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -251,6 +251,38 @@ the ``extractall`` method returns all matches. .. _whatsnew_0180.enhancements.rounding: + +Changes to str.cat +^^^^^^^^^^^^^^^^^^ + +The :ref:`.str.cat ` concatenates the members of a Series. Before, if NaN values +were present in the Series, calling `cat()` on it would return NaN, unlike the rest of the +`Series.str.*` API. This behavior has been amended to ignore NaN values by default. +(:issue:`11435`). + +A new, friendlier ValueError was also added to protect against the mistake of supplying the +`sep` as an arg, rather than a kwarg. +(:issue:`11334`). + +.. code-block:: python + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ') + 'a b c' + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?') + 'a b ? c' + + >>> Series(['a','b',np.nan,'c']).str.cat(' ') + --------------------------------------------------------------------------- + ValueError Traceback (most recent call last) + in () + ----> 1 Series(['a','b',np.nan,'c']).str.cat(' ') + + [...] + + ValueError: Did you mean to supply a `sep` keyword? + + Datetimelike rounding ^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 727e3fcb377bd..41fdb5e4a8e03 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,7 +1,7 @@ import numpy as np from pandas.compat import zip -from pandas.core.common import (isnull, _values_from_object, is_bool_dtype, +from pandas.core.common import (isnull, notnull, _values_from_object, is_bool_dtype, is_list_like, is_categorical_dtype, is_object_dtype, take_1d) import pandas.compat as compat @@ -37,7 +37,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): If None, returns str concatenating strings of the Series sep : string or None, default None na_rep : string or None, default None - If None, an NA in any array will propagate + If None, NA in the series are ignored. Returns ------- @@ -45,6 +45,15 @@ def str_cat(arr, others=None, sep=None, na_rep=None): Examples -------- + When ``na_rep`` is `None` (default behavior), NaN value(s) + in the Series are ignored. + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ') + 'a b c' + + >>> Series(['a','b',np.nan,'c']).str.cat(sep=' ', na_rep='?') + 'a b ? c' + If ``others`` is specified, corresponding values are concatenated with the separator. Result will be a Series of strings. @@ -103,18 +112,23 @@ def str_cat(arr, others=None, sep=None, na_rep=None): arr = np.asarray(arr, dtype=object) mask = isnull(arr) if na_rep is None and mask.any(): - return np.nan + if sep == '': + na_rep = '' + else: + return sep.join(arr[notnull(arr)]) return sep.join(np.where(mask, na_rep, arr)) def _length_check(others): n = None for x in others: - if n is None: - n = len(x) - elif len(x) != n: - raise ValueError('All arrays must be same length') - + try: + if n is None: + n = len(x) + elif len(x) != n: + raise ValueError('All arrays must be same length') + except TypeError: + raise ValueError("Did you mean to supply a `sep` keyword?") return n diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index f0bb002a1c96d..2e439cdd3842e 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -100,7 +100,8 @@ def test_cat(self): # single array result = strings.str_cat(one) - self.assertTrue(isnull(result)) + exp = 'aabbc' + self.assertEqual(result, exp) result = strings.str_cat(one, na_rep='NA') exp = 'aabbcNA' @@ -114,6 +115,10 @@ def test_cat(self): exp = 'a_a_b_b_c_NA' self.assertEqual(result, exp) + result = strings.str_cat(two, sep='-') + exp = 'a-b-d-foo' + self.assertEqual(result, exp) + # Multiple arrays result = strings.str_cat(one, [two], na_rep='NA') exp = ['aa', 'aNA', 'bb', 'bd', 'cfoo', 'NANA'] @@ -2453,6 +2458,16 @@ def test_cat_on_filtered_index(self): self.assertEqual(str_multiple.loc[1], '2011 2 2') + def test_str_cat_raises_intuitive_error(self): + # https://github.com/pydata/pandas/issues/11334 + s = Series(['a','b','c','d']) + message = "Did you mean to supply a `sep` keyword?" + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat('|') + with tm.assertRaisesRegexp(ValueError, message): + s.str.cat(' ') + + def test_index_str_accessor_visibility(self): from pandas.core.strings import StringMethods