1
+ # pylint: disable=E1101,W0232
2
+
1
3
import numpy as np
2
4
import pandas .core .common as com
3
- import pandas .lib as lib
4
5
5
6
6
- class Factor (np . ndarray ):
7
+ class Factor (object ):
7
8
"""
8
9
Represents a categorical variable in classic R / S-plus fashion
9
10
10
11
Parameters
11
12
----------
13
+ labels : ndarray of integers
14
+ levels : Index-like (unique)
15
+
12
16
data : array-like
13
17
14
18
Returns
@@ -17,43 +21,45 @@ class Factor(np.ndarray):
17
21
* labels : ndarray
18
22
* levels : ndarray
19
23
"""
20
- def __new__ ( cls , data ):
24
+ def __init__ ( self , labels , levels ):
21
25
from pandas .core .index import _ensure_index
26
+
27
+ levels = _ensure_index (levels )
28
+ if not levels .is_unique :
29
+ raise ValueError ('Factor levels must be unique' )
30
+
31
+ self .labels = labels
32
+ self .levels = levels
33
+
34
+ @classmethod
35
+ def from_array (cls , data ):
22
36
from pandas .core .algorithms import factorize
23
37
24
38
try :
25
39
labels , levels , _ = factorize (data , sort = True )
26
40
except TypeError :
27
41
labels , levels , _ = factorize (data , sort = False )
28
42
29
- labels = labels .view (Factor )
30
- labels .levels = _ensure_index (levels )
31
- return labels
43
+ return Factor (labels , levels )
32
44
33
45
levels = None
34
46
35
- def __array_finalize__ (self , obj ):
36
- self .levels = getattr (obj , 'levels' , None )
37
-
38
- @property
39
- def labels (self ):
40
- return self .view (np .ndarray )
41
-
42
- def asarray (self ):
43
- return np .asarray (self .levels ).take (self .labels )
47
+ def __array__ (self ):
48
+ return self .levels .values .take (self .labels )
44
49
45
50
def __len__ (self ):
46
51
return len (self .labels )
47
52
48
53
def __repr__ (self ):
49
54
temp = 'Factor:\n %s\n Levels (%d): %s'
50
- values = self .asarray ()
55
+ values = np .asarray (self )
51
56
return temp % (repr (values ), len (self .levels ), self .levels )
52
57
53
58
def __getitem__ (self , key ):
54
59
if isinstance (key , (int , np .integer )):
55
60
i = self .labels [key ]
56
61
return self .levels [i ]
57
62
else :
58
- return np .ndarray .__getitem__ (self , key )
63
+ return Factor (self .labels [key ], self .levels )
64
+
59
65
0 commit comments