From 7a621db5c147431d7515f44aea3d4f60efe2eb42 Mon Sep 17 00:00:00 2001 From: Suyash Dongre <109069262+Suyashd999@users.noreply.github.com> Date: Sun, 5 Nov 2023 09:32:41 +0530 Subject: [PATCH 1/6] Added doctest to decision_tree.py --- machine_learning/decision_tree.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 7cd1b02c4181..8ec17d1cd4e4 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -49,6 +49,26 @@ def train(self, x, y): The contents of y are the labels for the corresponding X values train does not have a return value + + Examples: + 1. Try to train when x & y are of same length & 1 dimesions (No errors) + >>> dt = DecisionTree() + >>> dt.train(np.array([10,20,30,40,50]),np.array([0,0,0,1,1])) + + 2. Try to train when x is 2 dimesions + >>> dt = DecisionTree() + >>> dt.train(np.array([[1,2,3,4,5],[1,2,3,4,5]]),np.array([0,0,0,1,1])) + Error: Input data set must be one dimensional + + 3. Try to train when x and y are not of same length + >>> dt = DecisionTree() + >>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]])) + Error: X and y have different lengths + + 4. Try to train when x & y are of same length but diffetent dimesions + >>> dt = DecisionTree() + >>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]])) + Error: Data set labels must be one dimensional """ """ From c61ceb2fdbf7dd05013eb2e24c1c78b090acb260 Mon Sep 17 00:00:00 2001 From: Suyash Dongre <109069262+Suyashd999@users.noreply.github.com> Date: Sun, 5 Nov 2023 09:35:25 +0530 Subject: [PATCH 2/6] Update decision_tree.py --- machine_learning/decision_tree.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 8ec17d1cd4e4..f7f92927c741 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -51,11 +51,11 @@ def train(self, x, y): train does not have a return value Examples: - 1. Try to train when x & y are of same length & 1 dimesions (No errors) + 1. Try to train when x & y are of same length & 1 dimensions (No errors) >>> dt = DecisionTree() >>> dt.train(np.array([10,20,30,40,50]),np.array([0,0,0,1,1])) - 2. Try to train when x is 2 dimesions + 2. Try to train when x is 2 dimensions >>> dt = DecisionTree() >>> dt.train(np.array([[1,2,3,4,5],[1,2,3,4,5]]),np.array([0,0,0,1,1])) Error: Input data set must be one dimensional @@ -65,7 +65,7 @@ def train(self, x, y): >>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]])) Error: X and y have different lengths - 4. Try to train when x & y are of same length but diffetent dimesions + 4. Try to train when x & y are of same length but diffetent dimensions >>> dt = DecisionTree() >>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]])) Error: Data set labels must be one dimensional From 5b49187dab388527e4f99de4de26475a4a3be699 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 5 Nov 2023 13:25:27 +0545 Subject: [PATCH 3/6] Update machine_learning/decision_tree.py --- machine_learning/decision_tree.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index f7f92927c741..5c9e397256c1 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -69,9 +69,6 @@ def train(self, x, y): >>> dt = DecisionTree() >>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]])) Error: Data set labels must be one dimensional - """ - - """ this section is to check that the inputs conform to our dimensionality constraints """ From 352e384dab8bdd6b9be7d18dc883b23c4177b770 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 5 Nov 2023 13:25:55 +0545 Subject: [PATCH 4/6] Update machine_learning/decision_tree.py --- machine_learning/decision_tree.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index 5c9e397256c1..ae948faee1fe 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -69,7 +69,8 @@ def train(self, x, y): >>> dt = DecisionTree() >>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]])) Error: Data set labels must be one dimensional - this section is to check that the inputs conform to our dimensionality + + This section is to check that the inputs conform to our dimensionality constraints """ if x.ndim != 1: From 8d56475f5ca63c1f15db950bd3101933e47a1053 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 5 Nov 2023 13:33:01 +0545 Subject: [PATCH 5/6] raise ValueError() --- machine_learning/decision_tree.py | 33 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index ae948faee1fe..fb70ececfb41 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -18,7 +18,7 @@ def __init__(self, depth=5, min_leaf_size=5): def mean_squared_error(self, labels, prediction): """ mean_squared_error: - @param labels: a one dimensional numpy array + @param labels: a one-dimensional numpy array @param prediction: a floating point value return value: mean_squared_error calculates the error if prediction is used to estimate the labels @@ -44,11 +44,11 @@ def mean_squared_error(self, labels, prediction): def train(self, x, y): """ train: - @param x: a one dimensional numpy array - @param y: a one dimensional numpy array. + @param x: a one-dimensional numpy array + @param y: a one-dimensional numpy array. The contents of y are the labels for the corresponding X values - train does not have a return value + train() does not have a return value Examples: 1. Try to train when x & y are of same length & 1 dimensions (No errors) @@ -58,30 +58,33 @@ def train(self, x, y): 2. Try to train when x is 2 dimensions >>> dt = DecisionTree() >>> dt.train(np.array([[1,2,3,4,5],[1,2,3,4,5]]),np.array([0,0,0,1,1])) - Error: Input data set must be one dimensional + Traceback (most recent call last): + ... + ValueError: Input data set must be one-dimensional - 3. Try to train when x and y are not of same length + 3. Try to train when x and y are not of the same length >>> dt = DecisionTree() >>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]])) - Error: X and y have different lengths + Traceback (most recent call last): + ... + ValueError: X and y have different lengths - 4. Try to train when x & y are of same length but diffetent dimensions + 4. Try to train when x & y are of the same length but different dimensions >>> dt = DecisionTree() >>> dt.train(np.array([1,2,3,4,5]),np.array([[1],[2],[3],[4],[5]])) - Error: Data set labels must be one dimensional + Traceback (most recent call last): + ... + ValueError: Data set labels must be one-dimensional This section is to check that the inputs conform to our dimensionality constraints """ if x.ndim != 1: - print("Error: Input data set must be one dimensional") - return + raise ValueError("Input data set must be one-dimensional") if len(x) != len(y): - print("Error: X and y have different lengths") - return + raise ValueError("x and y have different lengths") if y.ndim != 1: - print("Error: Data set labels must be one dimensional") - return + raise ValueError("Data set labels must be one-dimensional") if len(x) < 2 * self.min_leaf_size: self.prediction = np.mean(y) From 4f69fa5fe2b00b2c65dd24440bd8ccde3836bde8 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sun, 5 Nov 2023 13:52:44 +0545 Subject: [PATCH 6/6] Update decision_tree.py --- machine_learning/decision_tree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/machine_learning/decision_tree.py b/machine_learning/decision_tree.py index fb70ececfb41..c67e09c7f114 100644 --- a/machine_learning/decision_tree.py +++ b/machine_learning/decision_tree.py @@ -67,7 +67,7 @@ def train(self, x, y): >>> dt.train(np.array([1,2,3,4,5]),np.array([[0,0,0,1,1],[0,0,0,1,1]])) Traceback (most recent call last): ... - ValueError: X and y have different lengths + ValueError: x and y have different lengths 4. Try to train when x & y are of the same length but different dimensions >>> dt = DecisionTree()