Skip to content

Commit 58ae9db

Browse files
committed
Merge pull request pandas-dev#10724 from ajcr/GH9431
BUG: pd.unique should respect datetime64 and timedelta64 dtypes (GH9431)
2 parents 304a5f4 + cf1f181 commit 58ae9db

File tree

3 files changed

+53
-2
lines changed

3 files changed

+53
-2
lines changed

doc/source/whatsnew/v0.17.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -606,3 +606,4 @@ Bug Fixes
606606
- Bug in vectorised setting of timestamp columns with python ``datetime.date`` and numpy ``datetime64`` (:issue:`10408`, :issue:`10412`)
607607

608608
- Bug in ``pd.DataFrame`` when constructing an empty DataFrame with a string dtype (:issue:`9428`)
609+
- Bug in ``pd.unique`` for arrays with the ``datetime64`` or ``timedelta64`` dtype that meant an array with object dtype was returned instead the original dtype (:issue: `9431`)

pandas/core/algorithms.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def match(to_match, values, na_sentinel=-1):
3636
values = np.array(values, dtype='O')
3737

3838
f = lambda htype, caster: _match_generic(to_match, values, htype, caster)
39-
result = _hashtable_algo(f, values.dtype)
39+
result = _hashtable_algo(f, values.dtype, np.int64)
4040

4141
if na_sentinel != -1:
4242

@@ -66,14 +66,20 @@ def unique(values):
6666
return _hashtable_algo(f, values.dtype)
6767

6868

69-
def _hashtable_algo(f, dtype):
69+
def _hashtable_algo(f, dtype, return_dtype=None):
7070
"""
7171
f(HashTable, type_caster) -> result
7272
"""
7373
if com.is_float_dtype(dtype):
7474
return f(htable.Float64HashTable, com._ensure_float64)
7575
elif com.is_integer_dtype(dtype):
7676
return f(htable.Int64HashTable, com._ensure_int64)
77+
elif com.is_datetime64_dtype(dtype):
78+
return_dtype = return_dtype or 'M8[ns]'
79+
return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype)
80+
elif com.is_timedelta64_dtype(dtype):
81+
return_dtype = return_dtype or 'm8[ns]'
82+
return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype)
7783
else:
7884
return f(htable.PyObjectHashTable, com._ensure_object)
7985

pandas/tests/test_algos.py

+44
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,50 @@ def test_on_index_object(self):
235235

236236
tm.assert_almost_equal(result, expected)
237237

238+
def test_datetime64_dtype_array_returned(self):
239+
# GH 9431
240+
expected = np.array(['2015-01-03T00:00:00.000000000+0000',
241+
'2015-01-01T00:00:00.000000000+0000'], dtype='M8[ns]')
242+
243+
dt_index = pd.to_datetime(['2015-01-03T00:00:00.000000000+0000',
244+
'2015-01-01T00:00:00.000000000+0000',
245+
'2015-01-01T00:00:00.000000000+0000'])
246+
result = algos.unique(dt_index)
247+
tm.assert_numpy_array_equal(result, expected)
248+
self.assertEqual(result.dtype, expected.dtype)
249+
250+
s = pd.Series(dt_index)
251+
result = algos.unique(s)
252+
tm.assert_numpy_array_equal(result, expected)
253+
self.assertEqual(result.dtype, expected.dtype)
254+
255+
arr = s.values
256+
result = algos.unique(arr)
257+
tm.assert_numpy_array_equal(result, expected)
258+
self.assertEqual(result.dtype, expected.dtype)
259+
260+
261+
def test_timedelta64_dtype_array_returned(self):
262+
# GH 9431
263+
expected = np.array([31200, 45678, 10000], dtype='m8[ns]')
264+
265+
td_index = pd.to_timedelta([31200, 45678, 31200, 10000, 45678])
266+
result = algos.unique(td_index)
267+
tm.assert_numpy_array_equal(result, expected)
268+
self.assertEqual(result.dtype, expected.dtype)
269+
270+
s = pd.Series(td_index)
271+
result = algos.unique(s)
272+
tm.assert_numpy_array_equal(result, expected)
273+
self.assertEqual(result.dtype, expected.dtype)
274+
275+
arr = s.values
276+
result = algos.unique(arr)
277+
tm.assert_numpy_array_equal(result, expected)
278+
self.assertEqual(result.dtype, expected.dtype)
279+
280+
281+
238282
class TestValueCounts(tm.TestCase):
239283
_multiprocess_can_split_ = True
240284

0 commit comments

Comments
 (0)