Skip to content

Commit cd9aa24

Browse files
committed
Better TypeError for wrong dtype in str.cat
1 parent addc5fc commit cd9aa24

File tree

3 files changed

+56
-18
lines changed

3 files changed

+56
-18
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,7 @@ Strings
575575
^^^^^^^
576576

577577
- Bug in the ``__name__`` attribute of several methods of :class:`Series.str`, which were set incorrectly (:issue:`23551`)
578-
-
578+
- Improved error message when passing ``Series`` of wrong dtype to :meth:`Series.str.cat` (:issue:`22722`)
579579
-
580580

581581

pandas/core/strings.py

+39-17
Original file line numberDiff line numberDiff line change
@@ -2280,6 +2280,23 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
22802280
'must all be of the same length as the '
22812281
'calling Series/Index.')
22822282

2283+
# data has already been checked by _validate to be of correct dtype,
2284+
# but others could still have Series of dtypes (e.g. integers) which
2285+
# will necessarily fail in concatenation. To avoid deep and confusing
2286+
# traces, we raise here for anything that's not object or all-NA float.
2287+
def _legal_dtype(series):
2288+
# unify dtype handling between categorical/non-categorical
2289+
dtype = (series.dtype if not is_categorical_dtype(series)
2290+
else series.cat.categories.dtype)
2291+
legal = dtype == 'O' or (dtype == 'float' and series.isna().all())
2292+
return legal
2293+
err_wrong_dtype = ('Can only concatenate list-likes containing only '
2294+
'strings (or missing values).')
2295+
if any(not _legal_dtype(x) for x in others):
2296+
raise TypeError(err_wrong_dtype + ' Received list-like of dtype: '
2297+
'{}'.format([x.dtype for x in others
2298+
if not _legal_dtype(x)][0]))
2299+
22832300
if join is None and warn:
22842301
warnings.warn("A future version of pandas will perform index "
22852302
"alignment when `others` is a Series/Index/"
@@ -2307,23 +2324,28 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
23072324
na_masks = np.array([isna(x) for x in all_cols])
23082325
union_mask = np.logical_or.reduce(na_masks, axis=0)
23092326

2310-
if na_rep is None and union_mask.any():
2311-
# no na_rep means NaNs for all rows where any column has a NaN
2312-
# only necessary if there are actually any NaNs
2313-
result = np.empty(len(data), dtype=object)
2314-
np.putmask(result, union_mask, np.nan)
2315-
2316-
not_masked = ~union_mask
2317-
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
2318-
sep)
2319-
elif na_rep is not None and union_mask.any():
2320-
# fill NaNs with na_rep in case there are actually any NaNs
2321-
all_cols = [np.where(nm, na_rep, col)
2322-
for nm, col in zip(na_masks, all_cols)]
2323-
result = cat_core(all_cols, sep)
2324-
else:
2325-
# no NaNs - can just concatenate
2326-
result = cat_core(all_cols, sep)
2327+
# if there are any non-string, non-null values hidden within an object
2328+
# dtype, cat_core will fail; catch error and return with better message
2329+
try:
2330+
if na_rep is None and union_mask.any():
2331+
# no na_rep means NaNs for all rows where any column has a NaN
2332+
# only necessary if there are actually any NaNs
2333+
result = np.empty(len(data), dtype=object)
2334+
np.putmask(result, union_mask, np.nan)
2335+
2336+
not_masked = ~union_mask
2337+
result[not_masked] = cat_core([x[not_masked]
2338+
for x in all_cols], sep)
2339+
elif na_rep is not None and union_mask.any():
2340+
# fill NaNs with na_rep in case there are actually any NaNs
2341+
all_cols = [np.where(nm, na_rep, col)
2342+
for nm, col in zip(na_masks, all_cols)]
2343+
result = cat_core(all_cols, sep)
2344+
else:
2345+
# no NaNs - can just concatenate
2346+
result = cat_core(all_cols, sep)
2347+
except TypeError:
2348+
raise TypeError(err_wrong_dtype)
23272349

23282350
if isinstance(self._orig, Index):
23292351
# add dtype for case that result is all-NA

pandas/tests/test_strings.py

+16
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,22 @@ def test_str_cat_categorical(self, box, dtype_caller, dtype_target, sep):
420420
result = s.str.cat(t, sep=sep)
421421
assert_series_or_index_equal(result, expected)
422422

423+
# test integer/float dtypes (inferred by constructor) and mixed
424+
@pytest.mark.parametrize('data', [[1, 2, 3], [.1, .2, .3], [1, 2, 'b']],
425+
ids=['integers', 'floats', 'mixed'])
426+
# without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b']
427+
@pytest.mark.parametrize('box', [Series, Index, list,
428+
lambda x: np.array(x, dtype=object)],
429+
ids=['Series', 'Index', 'list', 'np.array'])
430+
def test_str_cat_wrong_dtype_raises(self, box, data):
431+
# GH 22722
432+
s = Series(['a', 'b', 'c'])
433+
t = box(data)
434+
435+
msg = 'Can only concatenate list-likes containing only strings.*'
436+
with pytest.raises(TypeError, match=msg):
437+
s.str.cat(t, join='left')
438+
423439
@pytest.mark.parametrize('box', [Series, Index])
424440
def test_str_cat_mixed_inputs(self, box):
425441
s = Index(['a', 'b', 'c', 'd'])

0 commit comments

Comments
 (0)