Skip to content

Commit eb50e28

Browse files
committed
PCA and LDA finished and tested
1 parent 24a68e9 commit eb50e28

File tree

1 file changed

+63
-79
lines changed

1 file changed

+63
-79
lines changed

Diff for: machine_learning/dimensionality_reduction.py

+63-79
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
import copy
21
import logging
32
import numpy as np
43
import scipy
54

5+
logging.basicConfig(level=logging.INFO, format='%(message)s')
6+
7+
68
def column_reshape(input_array: np.ndarray) -> np.ndarray:
79
"""Function to reshape a row Numpy array into a column Numpy array"""
810

@@ -12,17 +14,17 @@ def column_reshape(input_array: np.ndarray) -> np.ndarray:
1214
def covariance_within_classes(features: np.ndarray, labels: np.ndarray, classes: int) -> np.ndarray:
1315
"""Function to compute the covariance matrix inside each class"""
1416

15-
covariance_sum = None
17+
covariance_sum = np.nan
1618
for i in range(classes):
1719
data = features[:, labels == i]
1820
data_mean = data.mean(1)
1921
# Centralize the data of class i
2022
centered_data = data - column_reshape(data_mean)
21-
if covariance_sum:
23+
if i > 0:
2224
# If covariance_sum is not None
2325
covariance_sum += np.dot(centered_data, centered_data.T)
2426
else:
25-
# If covariance_sum is None (i.e. first loop)
27+
# If covariance_sum is np.nan (i.e. first loop)
2628
covariance_sum = np.dot(centered_data, centered_data.T)
2729

2830
return covariance_sum / features.shape[1]
@@ -32,92 +34,74 @@ def covariance_between_classes(features: np.ndarray, labels: np.ndarray, classes
3234
"""Function to compute the covariance matrix between multiple classes"""
3335

3436
general_data_mean = features.mean(1)
35-
covariance_sum = None
37+
covariance_sum = np.nan
3638
for i in range(classes):
3739
data = features[:, labels == i]
3840
device_data = data.shape[1]
3941
data_mean = data.mean(1)
40-
if covariance_sum:
42+
if i > 0:
4143
# If covariance_sum is not None
42-
covariance_sum += device_data * np.dot((column_reshape(data_mean) - column_reshape(general_data_mean),
43-
(column_reshape(data_mean) - column_reshape(general_data_mean)).T))
44+
covariance_sum += device_data * np.dot(column_reshape(data_mean) - column_reshape(general_data_mean),
45+
(column_reshape(data_mean) - column_reshape(general_data_mean)).T)
4446
else:
45-
# If covariance_sum is None (i.e. first loop)
46-
covariance_sum = device_data * np.dot((column_reshape(data_mean) - column_reshape(general_data_mean),
47-
(column_reshape(data_mean) - column_reshape(general_data_mean)).T))
47+
# If covariance_sum is np.nan (i.e. first loop)
48+
covariance_sum = device_data * np.dot(column_reshape(data_mean) - column_reshape(general_data_mean),
49+
(column_reshape(data_mean) - column_reshape(general_data_mean)).T)
4850

4951
return covariance_sum / features.shape[1]
5052

5153

52-
class DimensionalityReduction:
53-
"""Class to apply PCA and LDA techniques for the dataset dimensionality reduction.\n
54-
The data structures used are: \n
55-
* self._features: contains the features for each object as a matrix \n
56-
* self._class_labels: contains the labels associated with each object \n
57-
* self._classes: the number of classes in the dataset \n
58-
* self._features_after_PCA: will contain the features mapped in a new space after PCA"""
59-
60-
def __init__(self, features: np.ndarray, class_labels: np.ndarray, classes: int):
61-
logging.basicConfig(level=logging.INFO, format='%(message)s')
62-
self._features = features
63-
self._class_labels = class_labels
64-
self._classes = classes
65-
self._features_after_PCA = None
66-
67-
def PCA(self, dimensions: int) -> np.ndarray:
68-
"""Principal Component Analysis with a certain filter parameter"""
69-
70-
try:
71-
# Check if the features have been loaded
72-
assert any(self._features) is True
73-
data_mean = self._features.mean(1)
74-
# Center the dataset
75-
centered_data = self._features - np.reshape(data_mean, (data_mean.size, 1))
76-
covariance_matrix = np.dot(centered_data, centered_data.T) / self._features.shape[1]
77-
_, eigenvectors = np.linalg.eigh(covariance_matrix)
78-
# Take all the columns in the reverse order (-1), and then takes only the first columns
79-
filtered_eigenvectors = eigenvectors[:, ::-1][:, 0:dimensions]
80-
# Project the database on the new space
81-
projected_data = np.dot(filtered_eigenvectors.T, self._features)
82-
self._features_after_PCA = copy.deepcopy(projected_data)
83-
logging.info("Principal Component Analysis computed")
84-
except AssertionError:
85-
logging.basicConfig(level=logging.ERROR, format='%(message)s', force=True)
86-
logging.error("Feature array is empty")
87-
raise AssertionError
54+
def PCA(features: np.ndarray, dimensions: int) -> np.ndarray:
55+
"""Principal Component Analysis \n
56+
Parameters: \n
57+
* features: the features extracted from the dataset
58+
* labels: the class labels of the features
59+
* dimensions: to filter the projected data for the desired dimension"""
60+
61+
# Check if the features have been loaded
62+
if features.any():
63+
data_mean = features.mean(1)
64+
# Center the dataset
65+
centered_data = features - np.reshape(data_mean, (data_mean.size, 1))
66+
covariance_matrix = np.dot(centered_data, centered_data.T) / features.shape[1]
67+
_, eigenvectors = np.linalg.eigh(covariance_matrix)
68+
# Take all the columns in the reverse order (-1), and then takes only the first columns
69+
filtered_eigenvectors = eigenvectors[:, ::-1][:, 0:dimensions]
70+
# Project the database on the new space
71+
projected_data = np.dot(filtered_eigenvectors.T, features)
72+
logging.info("Principal Component Analysis computed")
8873

8974
return projected_data
90-
91-
def LDA(self, dimensions: int, pca_features=False) -> np.ndarray:
92-
"""Linear Discriminant Analysis with a certain filter parameter"""
93-
94-
try:
95-
if not pca_features:
96-
# Check if features have been already loaded
97-
assert any(self._features) is True
98-
_, eigenvectors = scipy.linalg.eigh(
99-
covariance_between_classes(self._features, self._class_labels, self._classes),
100-
covariance_within_classes(self._features, self._class_labels, self._classes))
101-
filtered_eigenvectors = eigenvectors[:, ::-1][:, :dimensions]
102-
svd_matrix, _, _ = np.linalg.svd(filtered_eigenvectors)
103-
filtered_svd_matrix = svd_matrix[:, 0:dimensions]
104-
projected_data = np.dot(filtered_svd_matrix.T, self._features)
105-
logging.info("Linear Discriminant Analysis computed on original features")
106-
else:
107-
# Check if features mapped on PCA have been already loaded
108-
assert self._features_after_PCA is not None
109-
_, eigenvectors = scipy.linalg.eigh(
110-
covariance_between_classes(self._features_after_PCA, self._class_labels, self._classes),
111-
covariance_within_classes(self._features_after_PCA, self._class_labels, self._classes))
112-
filtered_eigenvectors = eigenvectors[:, ::-1][:, :dimensions]
113-
svd_matrix, _, _ = np.linalg.svd(filtered_eigenvectors)
114-
svd_matrix_filtered = svd_matrix[:, 0:dimensions]
115-
# Project the database on the new space
116-
projected_data = np.dot(svd_matrix_filtered.T, self._features)
117-
logging.info("Linear Discriminant Analysis computed on features pre-processed with PCA")
118-
except AssertionError:
119-
logging.basicConfig(level=logging.ERROR, format='%(message)s', force=True)
120-
logging.error("Features array is empty!")
121-
raise AssertionError
75+
else:
76+
logging.basicConfig(level=logging.ERROR, format='%(message)s', force=True)
77+
logging.error("Dataset empty")
78+
raise AssertionError
79+
80+
81+
def LDA(features: np.ndarray, labels: np.ndarray, classes: int, dimensions: int) -> np.ndarray:
82+
"""Linear Discriminant Analysis \n
83+
Parameters: \n
84+
* features: the features extracted from the dataset
85+
* labels: the class labels of the features
86+
* classes: the number of classes present in the dataset
87+
* dimensions: to filter the projected data for the desired dimension"""
88+
89+
# Check if the dimension desired is less than the number of classes
90+
assert classes > dimensions
91+
92+
# Check if features have been already loaded
93+
if features.any:
94+
_, eigenvectors = scipy.linalg.eigh(
95+
covariance_between_classes(features, labels, classes),
96+
covariance_within_classes(features, labels, classes))
97+
filtered_eigenvectors = eigenvectors[:, ::-1][:, :dimensions]
98+
svd_matrix, _, _ = np.linalg.svd(filtered_eigenvectors)
99+
filtered_svd_matrix = svd_matrix[:, 0:dimensions]
100+
projected_data = np.dot(filtered_svd_matrix.T, features)
101+
logging.info("Linear Discriminant Analysis computed")
122102

123103
return projected_data
104+
else:
105+
logging.basicConfig(level=logging.ERROR, format='%(message)s', force=True)
106+
logging.error("Dataset empty")
107+
raise AssertionError

0 commit comments

Comments
 (0)