From f89ff4c4b335a6715c91c41d197476739ca2de4d Mon Sep 17 00:00:00 2001 From: Megan Payne Date: Wed, 25 Oct 2023 01:36:09 +0200 Subject: [PATCH 1/6] added mean absolute error to loss_functions.py --- machine_learning/loss_functions.py | 41 ++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index ef34296360e2..3f198d7d6299 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -260,6 +260,47 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: squared_errors = (y_true - y_pred) ** 2 return np.mean(squared_errors) +def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: + """ + Calculate the mean absolute error (MAE) between ground truth and predicted values. + + MAE measures the absolute difference between true values and predicted values. + + MAE = (1/n) * Σ(abs(y_true - y_pred)) + + Reference: https://en.wikipedia.org/wiki/Mean_absolute_error + + Parameters: + - y_true: The true values (ground truth) + - y_pred: The predicted values + + >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2]) + >>> np.isclose(mean_absolute_error(true_values, predicted_values), 0.16) + True + >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2]) + >>> mean_absolute_error(true_labels, predicted_probs) + Traceback (most recent call last): + ... + ValueError: Input arrays must have the same length. + >>> true_labels = '1.0, 2.0, 3.0, 4.0' + >>> predicted_probs = '0.3, 0.8, 0.9, 0.2' + >>> mean_absolute_error(true_labels, predicted_probs) + Traceback (most recent call last): + ... + ValueError: Could not convert input to NumPy array. + """ + if len(y_true) != len(y_pred): + raise ValueError("Input arrays must have the same length.") + + if isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray): + return np.mean(abs(y_true - y_pred)) + else: + try: + return np.mean(abs(np.asarray(y_true) - np.asarray(y_pred))) + except ValueError as error: + raise error("Could not convert input to NumPy array.") def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ From b71af2596cfcfa4c874387f0c585d68ea4847a3a Mon Sep 17 00:00:00 2001 From: Megan Payne Date: Wed, 25 Oct 2023 01:47:07 +0200 Subject: [PATCH 2/6] added doctest to mean absolute error to loss_functions.py --- machine_learning/loss_functions.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 3f198d7d6299..deccae08fbf1 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -262,10 +262,11 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ - Calculate the mean absolute error (MAE) between ground truth and predicted values. + Calculates the Mean Absolute Error (MAE) between ground truth (observed) and predicted values. MAE measures the absolute difference between true values and predicted values. + Equation: MAE = (1/n) * Σ(abs(y_true - y_pred)) Reference: https://en.wikipedia.org/wiki/Mean_absolute_error @@ -278,8 +279,12 @@ def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2]) >>> np.isclose(mean_absolute_error(true_values, predicted_values), 0.16) True + >>> true_values = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) + >>> predicted_values = np.array([0.8, 2.1, 2.9, 4.2, 5.2]) + >>> np.isclose(mean_absolute_error(true_values, predicted_values), 2.16) + False >>> true_labels = np.array([1.0, 2.0, 3.0, 4.0, 5.0]) - >>> predicted_probs = np.array([0.3, 0.8, 0.9, 0.2]) + >>> predicted_probs = np.array([0.3, 0.8, 0.9, 5.2]) >>> mean_absolute_error(true_labels, predicted_probs) Traceback (most recent call last): ... From 94b3e8e100bf49eb72008d5d8081f07a286422ed Mon Sep 17 00:00:00 2001 From: Megan Payne Date: Wed, 25 Oct 2023 01:52:58 +0200 Subject: [PATCH 3/6] fixed long line in loss_functions.py --- machine_learning/loss_functions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index deccae08fbf1..c7ec9c666b90 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -262,7 +262,8 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ - Calculates the Mean Absolute Error (MAE) between ground truth (observed) and predicted values. + Calculates the Mean Absolute Error (MAE) between ground truth (observed) + and predicted values. MAE measures the absolute difference between true values and predicted values. From b082da3d74f3775f3ca02f1792a1304c278b3745 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Oct 2023 23:52:59 +0000 Subject: [PATCH 4/6] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- machine_learning/loss_functions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index deccae08fbf1..4202d0c0891c 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -260,6 +260,7 @@ def mean_squared_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: squared_errors = (y_true - y_pred) ** 2 return np.mean(squared_errors) + def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Calculates the Mean Absolute Error (MAE) between ground truth (observed) and predicted values. @@ -307,6 +308,7 @@ def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: except ValueError as error: raise error("Could not convert input to NumPy array.") + def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: """ Calculate the mean squared logarithmic error (MSLE) between ground truth and From 405819158839b4d89f0bbdbf4e4d566e08e5a5cd Mon Sep 17 00:00:00 2001 From: Megan Payne Date: Wed, 25 Oct 2023 02:05:13 +0200 Subject: [PATCH 5/6] fixed error in MAE --- machine_learning/loss_functions.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 0a9390fae672..5542efb81c81 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -291,12 +291,6 @@ def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: Traceback (most recent call last): ... ValueError: Input arrays must have the same length. - >>> true_labels = '1.0, 2.0, 3.0, 4.0' - >>> predicted_probs = '0.3, 0.8, 0.9, 0.2' - >>> mean_absolute_error(true_labels, predicted_probs) - Traceback (most recent call last): - ... - ValueError: Could not convert input to NumPy array. """ if len(y_true) != len(y_pred): raise ValueError("Input arrays must have the same length.") @@ -307,7 +301,7 @@ def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: try: return np.mean(abs(np.asarray(y_true) - np.asarray(y_pred))) except ValueError as error: - raise error("Could not convert input to NumPy array.") + raise ValueError("Could not convert input to NumPy array.") from error def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: From 2507adf858bdf641f4d0693ed282ef471b039915 Mon Sep 17 00:00:00 2001 From: Megan Payne Date: Thu, 26 Oct 2023 02:34:54 +0200 Subject: [PATCH 6/6] Update machine_learning/loss_functions.py Co-authored-by: Tianyi Zheng --- machine_learning/loss_functions.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/machine_learning/loss_functions.py b/machine_learning/loss_functions.py index 5542efb81c81..36e446764196 100644 --- a/machine_learning/loss_functions.py +++ b/machine_learning/loss_functions.py @@ -295,13 +295,7 @@ def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: if len(y_true) != len(y_pred): raise ValueError("Input arrays must have the same length.") - if isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray): - return np.mean(abs(y_true - y_pred)) - else: - try: - return np.mean(abs(np.asarray(y_true) - np.asarray(y_pred))) - except ValueError as error: - raise ValueError("Could not convert input to NumPy array.") from error + return np.mean(abs(y_true - y_pred)) def mean_squared_logarithmic_error(y_true: np.ndarray, y_pred: np.ndarray) -> float: