1
- import copy
2
1
import logging
3
2
import numpy as np
4
3
import scipy
5
4
5
+ logging .basicConfig (level = logging .INFO , format = '%(message)s' )
6
+
7
+
6
8
def column_reshape (input_array : np .ndarray ) -> np .ndarray :
7
9
"""Function to reshape a row Numpy array into a column Numpy array"""
8
10
@@ -12,17 +14,17 @@ def column_reshape(input_array: np.ndarray) -> np.ndarray:
12
14
def covariance_within_classes (features : np .ndarray , labels : np .ndarray , classes : int ) -> np .ndarray :
13
15
"""Function to compute the covariance matrix inside each class"""
14
16
15
- covariance_sum = None
17
+ covariance_sum = np . nan
16
18
for i in range (classes ):
17
19
data = features [:, labels == i ]
18
20
data_mean = data .mean (1 )
19
21
# Centralize the data of class i
20
22
centered_data = data - column_reshape (data_mean )
21
- if covariance_sum :
23
+ if i > 0 :
22
24
# If covariance_sum is not None
23
25
covariance_sum += np .dot (centered_data , centered_data .T )
24
26
else :
25
- # If covariance_sum is None (i.e. first loop)
27
+ # If covariance_sum is np.nan (i.e. first loop)
26
28
covariance_sum = np .dot (centered_data , centered_data .T )
27
29
28
30
return covariance_sum / features .shape [1 ]
@@ -32,92 +34,74 @@ def covariance_between_classes(features: np.ndarray, labels: np.ndarray, classes
32
34
"""Function to compute the covariance matrix between multiple classes"""
33
35
34
36
general_data_mean = features .mean (1 )
35
- covariance_sum = None
37
+ covariance_sum = np . nan
36
38
for i in range (classes ):
37
39
data = features [:, labels == i ]
38
40
device_data = data .shape [1 ]
39
41
data_mean = data .mean (1 )
40
- if covariance_sum :
42
+ if i > 0 :
41
43
# If covariance_sum is not None
42
- covariance_sum += device_data * np .dot (( column_reshape (data_mean ) - column_reshape (general_data_mean ),
43
- (column_reshape (data_mean ) - column_reshape (general_data_mean )).T ) )
44
+ covariance_sum += device_data * np .dot (column_reshape (data_mean ) - column_reshape (general_data_mean ),
45
+ (column_reshape (data_mean ) - column_reshape (general_data_mean )).T )
44
46
else :
45
- # If covariance_sum is None (i.e. first loop)
46
- covariance_sum = device_data * np .dot (( column_reshape (data_mean ) - column_reshape (general_data_mean ),
47
- (column_reshape (data_mean ) - column_reshape (general_data_mean )).T ) )
47
+ # If covariance_sum is np.nan (i.e. first loop)
48
+ covariance_sum = device_data * np .dot (column_reshape (data_mean ) - column_reshape (general_data_mean ),
49
+ (column_reshape (data_mean ) - column_reshape (general_data_mean )).T )
48
50
49
51
return covariance_sum / features .shape [1 ]
50
52
51
53
52
- class DimensionalityReduction :
53
- """Class to apply PCA and LDA techniques for the dataset dimensionality reduction.\n
54
- The data structures used are: \n
55
- * self._features: contains the features for each object as a matrix \n
56
- * self._class_labels: contains the labels associated with each object \n
57
- * self._classes: the number of classes in the dataset \n
58
- * self._features_after_PCA: will contain the features mapped in a new space after PCA"""
59
-
60
- def __init__ (self , features : np .ndarray , class_labels : np .ndarray , classes : int ):
61
- logging .basicConfig (level = logging .INFO , format = '%(message)s' )
62
- self ._features = features
63
- self ._class_labels = class_labels
64
- self ._classes = classes
65
- self ._features_after_PCA = None
66
-
67
- def PCA (self , dimensions : int ) -> np .ndarray :
68
- """Principal Component Analysis with a certain filter parameter"""
69
-
70
- try :
71
- # Check if the features have been loaded
72
- assert any (self ._features ) is True
73
- data_mean = self ._features .mean (1 )
74
- # Center the dataset
75
- centered_data = self ._features - np .reshape (data_mean , (data_mean .size , 1 ))
76
- covariance_matrix = np .dot (centered_data , centered_data .T ) / self ._features .shape [1 ]
77
- _ , eigenvectors = np .linalg .eigh (covariance_matrix )
78
- # Take all the columns in the reverse order (-1), and then takes only the first columns
79
- filtered_eigenvectors = eigenvectors [:, ::- 1 ][:, 0 :dimensions ]
80
- # Project the database on the new space
81
- projected_data = np .dot (filtered_eigenvectors .T , self ._features )
82
- self ._features_after_PCA = copy .deepcopy (projected_data )
83
- logging .info ("Principal Component Analysis computed" )
84
- except AssertionError :
85
- logging .basicConfig (level = logging .ERROR , format = '%(message)s' , force = True )
86
- logging .error ("Feature array is empty" )
87
- raise AssertionError
54
+ def PCA (features : np .ndarray , dimensions : int ) -> np .ndarray :
55
+ """Principal Component Analysis \n
56
+ Parameters: \n
57
+ * features: the features extracted from the dataset
58
+ * labels: the class labels of the features
59
+ * dimensions: to filter the projected data for the desired dimension"""
60
+
61
+ # Check if the features have been loaded
62
+ if features .any ():
63
+ data_mean = features .mean (1 )
64
+ # Center the dataset
65
+ centered_data = features - np .reshape (data_mean , (data_mean .size , 1 ))
66
+ covariance_matrix = np .dot (centered_data , centered_data .T ) / features .shape [1 ]
67
+ _ , eigenvectors = np .linalg .eigh (covariance_matrix )
68
+ # Take all the columns in the reverse order (-1), and then takes only the first columns
69
+ filtered_eigenvectors = eigenvectors [:, ::- 1 ][:, 0 :dimensions ]
70
+ # Project the database on the new space
71
+ projected_data = np .dot (filtered_eigenvectors .T , features )
72
+ logging .info ("Principal Component Analysis computed" )
88
73
89
74
return projected_data
90
-
91
- def LDA (self , dimensions : int , pca_features = False ) -> np .ndarray :
92
- """Linear Discriminant Analysis with a certain filter parameter"""
93
-
94
- try :
95
- if not pca_features :
96
- # Check if features have been already loaded
97
- assert any (self ._features ) is True
98
- _ , eigenvectors = scipy .linalg .eigh (
99
- covariance_between_classes (self ._features , self ._class_labels , self ._classes ),
100
- covariance_within_classes (self ._features , self ._class_labels , self ._classes ))
101
- filtered_eigenvectors = eigenvectors [:, ::- 1 ][:, :dimensions ]
102
- svd_matrix , _ , _ = np .linalg .svd (filtered_eigenvectors )
103
- filtered_svd_matrix = svd_matrix [:, 0 :dimensions ]
104
- projected_data = np .dot (filtered_svd_matrix .T , self ._features )
105
- logging .info ("Linear Discriminant Analysis computed on original features" )
106
- else :
107
- # Check if features mapped on PCA have been already loaded
108
- assert self ._features_after_PCA is not None
109
- _ , eigenvectors = scipy .linalg .eigh (
110
- covariance_between_classes (self ._features_after_PCA , self ._class_labels , self ._classes ),
111
- covariance_within_classes (self ._features_after_PCA , self ._class_labels , self ._classes ))
112
- filtered_eigenvectors = eigenvectors [:, ::- 1 ][:, :dimensions ]
113
- svd_matrix , _ , _ = np .linalg .svd (filtered_eigenvectors )
114
- svd_matrix_filtered = svd_matrix [:, 0 :dimensions ]
115
- # Project the database on the new space
116
- projected_data = np .dot (svd_matrix_filtered .T , self ._features )
117
- logging .info ("Linear Discriminant Analysis computed on features pre-processed with PCA" )
118
- except AssertionError :
119
- logging .basicConfig (level = logging .ERROR , format = '%(message)s' , force = True )
120
- logging .error ("Features array is empty!" )
121
- raise AssertionError
75
+ else :
76
+ logging .basicConfig (level = logging .ERROR , format = '%(message)s' , force = True )
77
+ logging .error ("Dataset empty" )
78
+ raise AssertionError
79
+
80
+
81
+ def LDA (features : np .ndarray , labels : np .ndarray , classes : int , dimensions : int ) -> np .ndarray :
82
+ """Linear Discriminant Analysis \n
83
+ Parameters: \n
84
+ * features: the features extracted from the dataset
85
+ * labels: the class labels of the features
86
+ * classes: the number of classes present in the dataset
87
+ * dimensions: to filter the projected data for the desired dimension"""
88
+
89
+ # Check if the dimension desired is less than the number of classes
90
+ assert classes > dimensions
91
+
92
+ # Check if features have been already loaded
93
+ if features .any :
94
+ _ , eigenvectors = scipy .linalg .eigh (
95
+ covariance_between_classes (features , labels , classes ),
96
+ covariance_within_classes (features , labels , classes ))
97
+ filtered_eigenvectors = eigenvectors [:, ::- 1 ][:, :dimensions ]
98
+ svd_matrix , _ , _ = np .linalg .svd (filtered_eigenvectors )
99
+ filtered_svd_matrix = svd_matrix [:, 0 :dimensions ]
100
+ projected_data = np .dot (filtered_svd_matrix .T , features )
101
+ logging .info ("Linear Discriminant Analysis computed" )
122
102
123
103
return projected_data
104
+ else :
105
+ logging .basicConfig (level = logging .ERROR , format = '%(message)s' , force = True )
106
+ logging .error ("Dataset empty" )
107
+ raise AssertionError
0 commit comments