Skip to content

Commit 443dcc5

Browse files
committed
BUG: fix Series.unique with 'different' NA values in an object array #714
1 parent 5eb72ea commit 443dcc5

File tree

3 files changed

+32
-8
lines changed

3 files changed

+32
-8
lines changed

pandas/core/series.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,7 @@
66
# pylint: disable=W0703,W0622,W0613,W0201
77

88
from itertools import izip
9-
import csv
109
import operator
11-
import types
1210
from distutils.version import LooseVersion
1311

1412
from numpy import nan, ndarray
@@ -815,11 +813,13 @@ def unique(self):
815813
"""
816814
values = self.values
817815
if issubclass(values.dtype.type, np.floating):
816+
if values.dtype != np.float64:
817+
values = values.astype(np.float64)
818818
table = lib.Float64HashTable(len(values))
819-
uniques = np.array(table.unique(values), dtype='f8')
819+
uniques = np.array(table.unique(values), dtype=np.float64)
820820
else:
821821
if not values.dtype == np.object_:
822-
values = values.astype('O')
822+
values = values.astype(np.object_)
823823
table = lib.PyObjectHashTable(len(values))
824824
uniques = lib.list_to_object_array(table.unique(values))
825825
uniques = lib.maybe_convert_objects(uniques)

pandas/src/hashtable.pyx

+10-4
Original file line numberDiff line numberDiff line change
@@ -728,13 +728,19 @@ cdef class PyObjectHashTable:
728728
object val
729729
khiter_t k
730730
list uniques = []
731+
bint seen_na = 0
731732

732733
for i in range(n):
733734
val = values[i]
734-
k = kh_get_pymap(self.table, <PyObject*>val)
735-
if k == self.table.n_buckets:
736-
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
737-
uniques.append(val)
735+
736+
if not _checknull(val):
737+
k = kh_get_pymap(self.table, <PyObject*>val)
738+
if k == self.table.n_buckets:
739+
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
740+
uniques.append(val)
741+
elif not seen_na:
742+
seen_na = 1
743+
uniques.append(ONAN)
738744

739745
return uniques
740746

pandas/tests/test_series.py

+18
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,24 @@ def test_value_counts_nunique(self):
14121412
expected = Series([])
14131413
assert_series_equal(hist, expected)
14141414

1415+
def test_unique(self):
1416+
# 714 also, dtype=float
1417+
s = Series([1.2345] * 100)
1418+
s[::2] = np.nan
1419+
result = s.unique()
1420+
self.assert_(len(result) == 2)
1421+
1422+
s = Series([1.2345] * 100, dtype='f4')
1423+
s[::2] = np.nan
1424+
result = s.unique()
1425+
self.assert_(len(result) == 2)
1426+
1427+
# NAs in object arrays #714
1428+
s = Series(['foo'] * 100, dtype='O')
1429+
s[::2] = np.nan
1430+
result = s.unique()
1431+
self.assert_(len(result) == 2)
1432+
14151433
def test_sort(self):
14161434
ts = self.ts.copy()
14171435
ts.sort()

0 commit comments

Comments
 (0)