Skip to content

Commit a5c362e

Browse files
committed
REGR: assure .unique of mixed strings does not stringize
closes pandas-dev#16107
1 parent 844013b commit a5c362e

File tree

2 files changed

+27
-5
lines changed

2 files changed

+27
-5
lines changed

pandas/core/algorithms.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,16 @@ def _ensure_data(values, dtype=None):
7878
elif is_float_dtype(values) or is_float_dtype(dtype):
7979
return _ensure_float64(values), 'float64', 'float64'
8080
elif is_object_dtype(values) and dtype is None:
81+
82+
# if we can infer a numeric then do this
83+
inferred = lib.infer_dtype(values)
84+
if inferred in ['integer']:
85+
return _ensure_int64(values), 'int64', 'int64'
86+
elif inferred in ['floating']:
87+
return _ensure_float64(values), 'float64', 'float64'
88+
elif inferred in ['boolean']:
89+
return np.asarray(values).astype('uint64'), 'bool', 'uint64'
90+
8191
return _ensure_object(np.asarray(values)), 'object', 'object'
8292
elif is_complex_dtype(values) or is_complex_dtype(dtype):
8393

@@ -87,7 +97,7 @@ def _ensure_data(values, dtype=None):
8797
values = _ensure_float64(values)
8898
return values, 'float64', 'float64'
8999

90-
except (TypeError, ValueError):
100+
except (TypeError, ValueError, OverflowError):
91101
# if we are trying to coerce to a dtype
92102
# and it is incompat this will fall thru to here
93103
return _ensure_object(values), 'object', 'object'
@@ -150,7 +160,13 @@ def _reconstruct_data(values, dtype, original):
150160
elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
151161
values = Index(original)._shallow_copy(values, name=None)
152162
elif dtype is not None:
153-
values = values.astype(dtype)
163+
164+
# don't cast to object if we are numeric
165+
if is_object_dtype(dtype):
166+
if not is_numeric_dtype(values):
167+
values = values.astype(dtype)
168+
else:
169+
values = values.astype(dtype)
154170

155171
return values
156172

@@ -161,7 +177,7 @@ def _ensure_arraylike(values):
161177
"""
162178
if not isinstance(values, (np.ndarray, ABCCategorical,
163179
ABCIndexClass, ABCSeries)):
164-
values = np.array(values)
180+
values = np.array(values, dtype=object)
165181
return values
166182

167183

pandas/tests/test_algos.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,12 @@ def test_uint64_overflow(self):
387387
exp = np.array([1, 2, 2**63], dtype=np.uint64)
388388
tm.assert_numpy_array_equal(algos.unique(s), exp)
389389

390+
def test_nan_in_object_array(self):
391+
l = ['a', np.nan, 'c', 'c']
392+
result = pd.unique(l)
393+
expected = np.array(['a', np.nan, 'c'], dtype=object)
394+
tm.assert_numpy_array_equal(result, expected)
395+
390396
def test_categorical(self):
391397

392398
# we are expecting to return in the order
@@ -1375,11 +1381,11 @@ def test_int64_add_overflow():
13751381
class TestMode(tm.TestCase):
13761382

13771383
def test_no_mode(self):
1378-
exp = Series([], dtype=np.float64)
1384+
exp = Series([], dtype=object)
13791385
tm.assert_series_equal(algos.mode([]), exp)
13801386

1381-
# GH 15714
13821387
def test_mode_single(self):
1388+
# GH 15714
13831389
exp_single = [1]
13841390
data_single = [1]
13851391

0 commit comments

Comments
 (0)