2
2
from functools import wraps
3
3
import re
4
4
import textwrap
5
- from typing import Dict
5
+ from typing import Dict , List
6
6
import warnings
7
7
8
8
import numpy as np
31
31
_shared_docs = dict () # type: Dict[str, str]
32
32
33
33
34
- def cat_core (list_of_columns , sep ):
34
+ def cat_core (list_of_columns : List , sep : str ):
35
35
"""
36
36
Auxiliary function for :meth:`str.cat`
37
37
@@ -53,6 +53,41 @@ def cat_core(list_of_columns, sep):
53
53
return np .sum (list_with_sep , axis = 0 )
54
54
55
55
56
+ def cat_safe (list_of_columns : List , sep : str ):
57
+ """
58
+ Auxiliary function for :meth:`str.cat`.
59
+
60
+ Same signature as cat_core, but handles TypeErrors in concatenation, which
61
+ happen if the arrays in list_of columns have the wrong dtypes or content.
62
+
63
+ Parameters
64
+ ----------
65
+ list_of_columns : list of numpy arrays
66
+ List of arrays to be concatenated with sep;
67
+ these arrays may not contain NaNs!
68
+ sep : string
69
+ The separator string for concatenating the columns
70
+
71
+ Returns
72
+ -------
73
+ nd.array
74
+ The concatenation of list_of_columns with sep
75
+ """
76
+ try :
77
+ result = cat_core (list_of_columns , sep )
78
+ except TypeError :
79
+ # if there are any non-string values (wrong dtype or hidden behind
80
+ # object dtype), np.sum will fail; catch and return with better message
81
+ for column in list_of_columns :
82
+ dtype = lib .infer_dtype (column , skipna = True )
83
+ if dtype not in ['string' , 'empty' ]:
84
+ raise TypeError (
85
+ 'Concatenation requires list-likes containing only '
86
+ 'strings (or missing values). Offending values found in '
87
+ 'column {}' .format (dtype )) from None
88
+ return result
89
+
90
+
56
91
def _na_map (f , arr , na_result = np .nan , dtype = object ):
57
92
# should really _check_ for NA
58
93
return _map (f , arr , na_mask = True , na_value = na_result , dtype = dtype )
@@ -2314,16 +2349,16 @@ def cat(self, others=None, sep=None, na_rep=None, join=None):
2314
2349
np .putmask (result , union_mask , np .nan )
2315
2350
2316
2351
not_masked = ~ union_mask
2317
- result [not_masked ] = cat_core ([x [not_masked ] for x in all_cols ],
2352
+ result [not_masked ] = cat_safe ([x [not_masked ] for x in all_cols ],
2318
2353
sep )
2319
2354
elif na_rep is not None and union_mask .any ():
2320
2355
# fill NaNs with na_rep in case there are actually any NaNs
2321
2356
all_cols = [np .where (nm , na_rep , col )
2322
2357
for nm , col in zip (na_masks , all_cols )]
2323
- result = cat_core (all_cols , sep )
2358
+ result = cat_safe (all_cols , sep )
2324
2359
else :
2325
2360
# no NaNs - can just concatenate
2326
- result = cat_core (all_cols , sep )
2361
+ result = cat_safe (all_cols , sep )
2327
2362
2328
2363
if isinstance (self ._orig , Index ):
2329
2364
# add dtype for case that result is all-NA
0 commit comments