File tree 3 files changed +32
-8
lines changed
3 files changed +32
-8
lines changed Original file line number Diff line number Diff line change 6
6
# pylint: disable=W0703,W0622,W0613,W0201
7
7
8
8
from itertools import izip
9
- import csv
10
9
import operator
11
- import types
12
10
from distutils .version import LooseVersion
13
11
14
12
from numpy import nan , ndarray
@@ -815,11 +813,13 @@ def unique(self):
815
813
"""
816
814
values = self .values
817
815
if issubclass (values .dtype .type , np .floating ):
816
+ if values .dtype != np .float64 :
817
+ values = values .astype (np .float64 )
818
818
table = lib .Float64HashTable (len (values ))
819
- uniques = np .array (table .unique (values ), dtype = 'f8' )
819
+ uniques = np .array (table .unique (values ), dtype = np . float64 )
820
820
else :
821
821
if not values .dtype == np .object_ :
822
- values = values .astype ('O' )
822
+ values = values .astype (np . object_ )
823
823
table = lib .PyObjectHashTable (len (values ))
824
824
uniques = lib .list_to_object_array (table .unique (values ))
825
825
uniques = lib .maybe_convert_objects (uniques )
Original file line number Diff line number Diff line change @@ -728,13 +728,19 @@ cdef class PyObjectHashTable:
728
728
object val
729
729
khiter_t k
730
730
list uniques = []
731
+ bint seen_na = 0
731
732
732
733
for i in range (n):
733
734
val = values[i]
734
- k = kh_get_pymap(self .table, < PyObject* > val)
735
- if k == self .table.n_buckets:
736
- k = kh_put_pymap(self .table, < PyObject* > val, & ret)
737
- uniques.append(val)
735
+
736
+ if not _checknull(val):
737
+ k = kh_get_pymap(self .table, < PyObject* > val)
738
+ if k == self .table.n_buckets:
739
+ k = kh_put_pymap(self .table, < PyObject* > val, & ret)
740
+ uniques.append(val)
741
+ elif not seen_na:
742
+ seen_na = 1
743
+ uniques.append(ONAN)
738
744
739
745
return uniques
740
746
Original file line number Diff line number Diff line change @@ -1412,6 +1412,24 @@ def test_value_counts_nunique(self):
1412
1412
expected = Series ([])
1413
1413
assert_series_equal (hist , expected )
1414
1414
1415
+ def test_unique (self ):
1416
+ # 714 also, dtype=float
1417
+ s = Series ([1.2345 ] * 100 )
1418
+ s [::2 ] = np .nan
1419
+ result = s .unique ()
1420
+ self .assert_ (len (result ) == 2 )
1421
+
1422
+ s = Series ([1.2345 ] * 100 , dtype = 'f4' )
1423
+ s [::2 ] = np .nan
1424
+ result = s .unique ()
1425
+ self .assert_ (len (result ) == 2 )
1426
+
1427
+ # NAs in object arrays #714
1428
+ s = Series (['foo' ] * 100 , dtype = 'O' )
1429
+ s [::2 ] = np .nan
1430
+ result = s .unique ()
1431
+ self .assert_ (len (result ) == 2 )
1432
+
1415
1433
def test_sort (self ):
1416
1434
ts = self .ts .copy ()
1417
1435
ts .sort ()
You can’t perform that action at this time.
0 commit comments