@@ -44,6 +44,78 @@ def test_strings(self):
44
44
expected = Series (np .array ([1 , 0 , np .nan , 0 , 1 , 2 , np .nan ]))
45
45
tm .assert_series_equal (result ,expected )
46
46
47
+ class TestFactorize (tm .TestCase ):
48
+ _multiprocess_can_split_ = True
49
+
50
+ def test_basic (self ):
51
+
52
+ labels , uniques = algos .factorize (['a' , 'b' , 'b' , 'a' ,
53
+ 'a' , 'c' , 'c' , 'c' ])
54
+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 1 , 1 , 0 , 0 , 2 , 2 , 2 ],dtype = np .int64 )))
55
+ self .assert_ (np .array_equal (uniques , np .array (['a' ,'b' ,'c' ], dtype = object )))
56
+
57
+ labels , uniques = algos .factorize (['a' , 'b' , 'b' , 'a' ,
58
+ 'a' , 'c' , 'c' , 'c' ], sort = True )
59
+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 1 , 1 , 0 , 0 , 2 , 2 , 2 ],dtype = np .int64 )))
60
+ self .assert_ (np .array_equal (uniques , np .array (['a' ,'b' ,'c' ], dtype = object )))
61
+
62
+ labels , uniques = algos .factorize (list (reversed (range (5 ))))
63
+ self .assert_ (np .array_equal (labels , np .array ([0 , 1 , 2 , 3 , 4 ], dtype = np .int64 )))
64
+ self .assert_ (np .array_equal (uniques , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
65
+
66
+ labels , uniques = algos .factorize (list (reversed (range (5 ))), sort = True )
67
+ self .assert_ (np .array_equal (labels , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
68
+ self .assert_ (np .array_equal (uniques , np .array ([0 , 1 , 2 , 3 , 4 ], dtype = np .int64 )))
69
+
70
+ labels , uniques = algos .factorize (list (reversed (np .arange (5. ))))
71
+ self .assert_ (np .array_equal (labels , np .array ([0. , 1. , 2. , 3. , 4. ], dtype = np .float64 )))
72
+ self .assert_ (np .array_equal (uniques , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
73
+
74
+ labels , uniques = algos .factorize (list (reversed (np .arange (5. ))), sort = True )
75
+ self .assert_ (np .array_equal (labels , np .array ([ 4 , 3 , 2 , 1 , 0 ],dtype = np .int64 )))
76
+ self .assert_ (np .array_equal (uniques , np .array ([0. , 1. , 2. , 3. , 4. ], dtype = np .float64 )))
77
+
78
+ def test_mixed (self ):
79
+
80
+ # doc example reshaping.rst
81
+ x = Series (['A' , 'A' , np .nan , 'B' , 3.14 , np .inf ])
82
+ labels , uniques = algos .factorize (x )
83
+
84
+ self .assert_ (np .array_equal (labels , np .array ([ 0 , 0 , - 1 , 1 , 2 , 3 ],dtype = np .int64 )))
85
+ self .assert_ (np .array_equal (uniques , np .array (['A' , 'B' , 3.14 , np .inf ], dtype = object )))
86
+
87
+ labels , uniques = algos .factorize (x , sort = True )
88
+ self .assert_ (np .array_equal (labels , np .array ([ 2 , 2 , - 1 , 3 , 0 , 1 ],dtype = np .int64 )))
89
+ self .assert_ (np .array_equal (uniques , np .array ([3.14 , np .inf , 'A' , 'B' ], dtype = object )))
90
+
91
+ def test_datelike (self ):
92
+
93
+ # M8
94
+ v1 = pd .Timestamp ('20130101 09:00:00.00004' )
95
+ v2 = pd .Timestamp ('20130101' )
96
+ x = Series ([v1 ,v1 ,v1 ,v2 ,v2 ,v1 ])
97
+ labels , uniques = algos .factorize (x )
98
+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
99
+ self .assert_ (np .array_equal (uniques , np .array ([v1 .value ,v2 .value ],dtype = 'M8[ns]' )))
100
+
101
+ labels , uniques = algos .factorize (x , sort = True )
102
+ self .assert_ (np .array_equal (labels , np .array ([ 1 ,1 ,1 ,0 ,0 ,1 ],dtype = np .int64 )))
103
+ self .assert_ (np .array_equal (uniques , np .array ([v2 .value ,v1 .value ],dtype = 'M8[ns]' )))
104
+
105
+ # period
106
+ v1 = pd .Period ('201302' ,freq = 'M' )
107
+ v2 = pd .Period ('201303' ,freq = 'M' )
108
+ x = Series ([v1 ,v1 ,v1 ,v2 ,v2 ,v1 ])
109
+
110
+ # periods are not 'sorted' as they are converted back into an index
111
+ labels , uniques = algos .factorize (x )
112
+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
113
+ self .assert_ (np .array_equal (uniques , np .array ([v1 , v2 ],dtype = object )))
114
+
115
+ labels , uniques = algos .factorize (x ,sort = True )
116
+ self .assert_ (np .array_equal (labels , np .array ([ 0 ,0 ,0 ,1 ,1 ,0 ],dtype = np .int64 )))
117
+ self .assert_ (np .array_equal (uniques , np .array ([v1 , v2 ],dtype = object )))
118
+
47
119
class TestUnique (tm .TestCase ):
48
120
_multiprocess_can_split_ = True
49
121
0 commit comments