Skip to content

Commit 5d0ff69

Browse files
h-vetinarijreback
authored andcommitted
Better error for str.cat with listlike of wrong dtype. (#26607)
1 parent a6f11ac commit 5d0ff69

File tree

3 files changed

+58
-6
lines changed

3 files changed

+58
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ Strings
613613
^^^^^^^
614614

615615
- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
616-
-
616+
- Improved error message when passing :class:`Series` of wrong dtype to :meth:`Series.str.cat` (:issue:`22722`)
617617
-
618618

619619

pandas/core/strings.py

+40-5
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from functools import wraps
33
import re
44
import textwrap
5-
from typing import Dict
5+
from typing import Dict, List
66
import warnings
77

88
import numpy as np
@@ -31,7 +31,7 @@
3131
_shared_docs = dict() # type: Dict[str, str]
3232

3333

34-
def cat_core(list_of_columns, sep):
34+
def cat_core(list_of_columns: List, sep: str):
3535
"""
3636
Auxiliary function for :meth:`str.cat`
3737
@@ -53,6 +53,41 @@ def cat_core(list_of_columns, sep):
5353
return np.sum(list_with_sep, axis=0)
5454

5555

56+
def cat_safe(list_of_columns: List, sep: str):
57+
"""
58+
Auxiliary function for :meth:`str.cat`.
59+
60+
Same signature as cat_core, but handles TypeErrors in concatenation, which
61+
happen if the arrays in list_of columns have the wrong dtypes or content.
62+
63+
Parameters
64+
----------
65+
list_of_columns : list of numpy arrays
66+
List of arrays to be concatenated with sep;
67+
these arrays may not contain NaNs!
68+
sep : string
69+
The separator string for concatenating the columns
70+
71+
Returns
72+
-------
73+
nd.array
74+
The concatenation of list_of_columns with sep
75+
"""
76+
try:
77+
result = cat_core(list_of_columns, sep)
78+
except TypeError:
79+
# if there are any non-string values (wrong dtype or hidden behind
80+
# object dtype), np.sum will fail; catch and return with better message
81+
for column in list_of_columns:
82+
dtype = lib.infer_dtype(column, skipna=True)
83+
if dtype not in ['string', 'empty']:
84+
raise TypeError(
85+
'Concatenation requires list-likes containing only '
86+
'strings (or missing values). Offending values found in '
87+
'column {}'.format(dtype)) from None
88+
return result
89+
90+
5691
def _na_map(f, arr, na_result=np.nan, dtype=object):
5792
# should really _check_ for NA
5893
return _map(f, arr, na_mask=True, na_value=na_result, dtype=dtype)
@@ -2314,16 +2349,16 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23142349
np.putmask(result, union_mask, np.nan)
23152350

23162351
not_masked = ~union_mask
2317-
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
2352+
result[not_masked] = cat_safe([x[not_masked] for x in all_cols],
23182353
sep)
23192354
elif na_rep is not None and union_mask.any():
23202355
# fill NaNs with na_rep in case there are actually any NaNs
23212356
all_cols = [np.where(nm, na_rep, col)
23222357
for nm, col in zip(na_masks, all_cols)]
2323-
result = cat_core(all_cols, sep)
2358+
result = cat_safe(all_cols, sep)
23242359
else:
23252360
# no NaNs - can just concatenate
2326-
result = cat_core(all_cols, sep)
2361+
result = cat_safe(all_cols, sep)
23272362

23282363
if isinstance(self._orig, Index):
23292364
# add dtype for case that result is all-NA

pandas/tests/test_strings.py

+17
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,23 @@ def test_str_cat_categorical(self, box, dtype_caller, dtype_target, sep):
428428
result = s.str.cat(t, sep=sep)
429429
assert_series_or_index_equal(result, expected)
430430

431+
# test integer/float dtypes (inferred by constructor) and mixed
432+
@pytest.mark.parametrize('data', [[1, 2, 3], [.1, .2, .3], [1, 2, 'b']],
433+
ids=['integers', 'floats', 'mixed'])
434+
# without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b']
435+
@pytest.mark.parametrize('box', [Series, Index, list,
436+
lambda x: np.array(x, dtype=object)],
437+
ids=['Series', 'Index', 'list', 'np.array'])
438+
def test_str_cat_wrong_dtype_raises(self, box, data):
439+
# GH 22722
440+
s = Series(['a', 'b', 'c'])
441+
t = box(data)
442+
443+
msg = 'Concatenation requires list-likes containing only strings.*'
444+
with pytest.raises(TypeError, match=msg):
445+
# need to use outer and na_rep, as otherwise Index would not raise
446+
s.str.cat(t, join='outer', na_rep='-')
447+
431448
@pytest.mark.parametrize('box', [Series, Index])
432449
def test_str_cat_mixed_inputs(self, box):
433450
s = Index(['a', 'b', 'c', 'd'])

0 commit comments

Comments
 (0)