Skip to content

Commit 8e5196c

Browse files
committed
unify error message by catching errors in cat_core; more test parametrization
1 parent fd5f878 commit 8e5196c

File tree

2 files changed

+39
-24
lines changed

2 files changed

+39
-24
lines changed

pandas/core/strings.py

+28-19
Original file line numberDiff line numberDiff line change
@@ -2290,9 +2290,10 @@ def _legal_dtype(series):
22902290
else series.cat.categories.dtype)
22912291
legal = dtype == 'O' or (dtype == 'float' and series.isna().all())
22922292
return legal
2293+
err_wrong_dtype = ('Can only concatenate list-likes containing only '
2294+
'strings (or missing values)')
22932295
if any(not _legal_dtype(x) for x in others):
2294-
raise TypeError('Can only concatenate list-likes containing only '
2295-
'strings (or missing values)')
2296+
raise TypeError(err_wrong_dtype)
22962297

22972298
if join is None and warn:
22982299
warnings.warn("A future version of pandas will perform index "
@@ -2321,23 +2322,31 @@ def _legal_dtype(series):
23212322
na_masks = np.array([isna(x) for x in all_cols])
23222323
union_mask = np.logical_or.reduce(na_masks, axis=0)
23232324

2324-
if na_rep is None and union_mask.any():
2325-
# no na_rep means NaNs for all rows where any column has a NaN
2326-
# only necessary if there are actually any NaNs
2327-
result = np.empty(len(data), dtype=object)
2328-
np.putmask(result, union_mask, np.nan)
2329-
2330-
not_masked = ~union_mask
2331-
result[not_masked] = cat_core([x[not_masked] for x in all_cols],
2332-
sep)
2333-
elif na_rep is not None and union_mask.any():
2334-
# fill NaNs with na_rep in case there are actually any NaNs
2335-
all_cols = [np.where(nm, na_rep, col)
2336-
for nm, col in zip(na_masks, all_cols)]
2337-
result = cat_core(all_cols, sep)
2338-
else:
2339-
# no NaNs - can just concatenate
2340-
result = cat_core(all_cols, sep)
2325+
# if there are any non-string, non-null values hidden within an object
2326+
# dtype, cat_core will fail; catch error and return better error.
2327+
try:
2328+
if na_rep is None and union_mask.any():
2329+
# no na_rep means NaNs for all rows where any column has a NaN
2330+
# only necessary if there are actually any NaNs
2331+
result = np.empty(len(data), dtype=object)
2332+
np.putmask(result, union_mask, np.nan)
2333+
2334+
not_masked = ~union_mask
2335+
result[not_masked] = cat_core([x[not_masked]
2336+
for x in all_cols], sep)
2337+
elif na_rep is not None and union_mask.any():
2338+
# fill NaNs with na_rep in case there are actually any NaNs
2339+
all_cols = [np.where(nm, na_rep, col)
2340+
for nm, col in zip(na_masks, all_cols)]
2341+
result = cat_core(all_cols, sep)
2342+
else:
2343+
# no NaNs - can just concatenate
2344+
result = cat_core(all_cols, sep)
2345+
except TypeError as exc:
2346+
if re.match(r'can only concatenate str \(not \"\w+\"\) to str',
2347+
str(exc)):
2348+
raise TypeError(err_wrong_dtype)
2349+
raise exc
23412350

23422351
if isinstance(self._orig, Index):
23432352
# add dtype for case that result is all-NA

pandas/tests/test_strings.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -420,15 +420,21 @@ def test_str_cat_categorical(self, box, dtype_caller, dtype_target, sep):
420420
result = s.str.cat(t, sep=sep)
421421
assert_series_or_index_equal(result, expected)
422422

423-
@pytest.mark.parametrize('box', [Series, Index, np.array, list])
424-
def test_str_cat_wrong_dtype_raises(self, box):
423+
# test integer/float dtypes (inferred by constructor) and mixed
424+
@pytest.mark.parametrize('data', [[1, 2, 3], [.1, .2, .3], [1, 2, 'b']],
425+
ids=['integers', 'floats', 'mixed'])
426+
# without dtype=object, np.array would cast [1, 2, 'b'] to ['1', '2', 'b']
427+
@pytest.mark.parametrize('box', [Series, Index, list,
428+
lambda x: np.array(x, dtype=object)],
429+
ids=['Series', 'Index', 'list', 'np.array'])
430+
def test_str_cat_wrong_dtype_raises(self, box, data):
425431
# GH 22722
426-
s = Series(['a', 'b', 'c', 'd'])
427-
t = box([1, 2, 3, 4])
432+
s = Series(['a', 'b', 'c'])
433+
t = box(data)
428434

429435
msg = 'Can only concatenate list-likes containing only strings.*'
430436
with pytest.raises(TypeError, match=msg):
431-
s.str.cat(t)
437+
s.str.cat(t, join='left')
432438

433439
@pytest.mark.parametrize('box', [Series, Index])
434440
def test_str_cat_mixed_inputs(self, box):

0 commit comments

Comments
 (0)