From 06c3f2a9b9bedf8aa99ea0f91000fe0aa587f959 Mon Sep 17 00:00:00 2001 From: Avi Agrawal Date: Tue, 15 Dec 2020 13:59:56 -0500 Subject: [PATCH 1/6] Updated KNN and README --- README.md | 13 ++-- src/algorithms/ML/KNN/README.md | 23 ++++++ src/algorithms/ML/KNN/__test__/knn.test.js | 36 ++++++++++ src/algorithms/ML/KNN/knn.js | 81 ++++++++++++++++++++++ 4 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 src/algorithms/ML/KNN/README.md create mode 100644 src/algorithms/ML/KNN/__test__/knn.test.js create mode 100644 src/algorithms/ML/KNN/knn.js diff --git a/README.md b/README.md index 9994971043..7db1a025c8 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ _Read this in other languages:_ [_Türk_](README.tr-TR.md), [_Italiana_](README.it-IT.md) -*☝ Note that this project is meant to be used for learning and researching purposes +*☝ Note that this project is meant to be used for learning and researching purposes only, and it is **not** meant to be used for production.* ## Data Structures @@ -64,7 +64,7 @@ a set of rules that precisely define a sequence of operations. * **Math** * `B` [Bit Manipulation](src/algorithms/math/bits) - set/get/update/clear bits, multiplication/division by two, make negative etc. - * `B` [Factorial](src/algorithms/math/factorial) + * `B` [Factorial](src/algorithms/math/factorial) * `B` [Fibonacci Number](src/algorithms/math/fibonacci) - classic and closed-form versions * `B` [Prime Factors](src/algorithms/math/prime-factors) - finding prime factors and counting them using Hardy-Ramanujan's theorem * `B` [Primality Test](src/algorithms/math/primality-test) (trial division method) @@ -80,7 +80,7 @@ a set of rules that precisely define a sequence of operations. * `A` [Integer Partition](src/algorithms/math/integer-partition) * `A` [Square Root](src/algorithms/math/square-root) - Newton's method * `A` [Liu Hui π Algorithm](src/algorithms/math/liu-hui) - approximate π calculations based on N-gons - * `A` [Discrete Fourier Transform](src/algorithms/math/fourier-transform) - decompose a function of time (a signal) into the frequencies that make it up + * `A` [Discrete Fourier Transform](src/algorithms/math/fourier-transform) - decompose a function of time (a signal) into the frequencies that make it up * **Sets** * `B` [Cartesian Product](src/algorithms/sets/cartesian-product) - product of multiple sets * `B` [Fisher–Yates Shuffle](src/algorithms/sets/fisher-yates) - random permutation of a finite sequence @@ -142,12 +142,13 @@ a set of rules that precisely define a sequence of operations. * `B` [Polynomial Hash](src/algorithms/cryptography/polynomial-hash) - rolling hash function based on polynomial * `B` [Caesar Cipher](src/algorithms/cryptography/caesar-cipher) - simple substitution cipher * **Machine Learning** - * `B` [NanoNeuron](https://github.com/trekhleb/nano-neuron) - 7 simple JS functions that illustrate how machines can actually learn (forward/backward propagation) + * `B` [NanoNeuron](https://github.com/trekhleb/nano-neuron) - 7 simple JS functions that illustrate how machines can actually learn (forward/backward propagation) + * `B` [KNN](src/algorithms/algorithm/ML/KNN) - K Nearest Neighbors * **Uncategorized** * `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower) * `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm - * `B` [Jump Game](src/algorithms/uncategorized/jump-game) - backtracking, dynamic programming (top-down + bottom-up) and greedy examples - * `B` [Unique Paths](src/algorithms/uncategorized/unique-paths) - backtracking, dynamic programming and Pascal's Triangle based examples + * `B` [Jump Game](src/algorithms/uncategorized/jump-game) - backtracking, dynamic programming (top-down + bottom-up) and greedy examples + * `B` [Unique Paths](src/algorithms/uncategorized/unique-paths) - backtracking, dynamic programming and Pascal's Triangle based examples * `B` [Rain Terraces](src/algorithms/uncategorized/rain-terraces) - trapping rain water problem (dynamic programming and brute force versions) * `B` [Recursive Staircase](src/algorithms/uncategorized/recursive-staircase) - count the number of ways to reach to the top (4 solutions) * `A` [N-Queens Problem](src/algorithms/uncategorized/n-queens) diff --git a/src/algorithms/ML/KNN/README.md b/src/algorithms/ML/KNN/README.md new file mode 100644 index 0000000000..d9db66ef46 --- /dev/null +++ b/src/algorithms/ML/KNN/README.md @@ -0,0 +1,23 @@ +# KNN Algorithm + +KNN stands for K Nearest Neighbors. KNN is a supervised Machine Learning algorithm. It's a classification algorithm, determining the class of a sample vector using a sample data. + +The idea is to calculate the similarity between two data points on the basis of a distance metric. Euclidean distance is used mostly for this task. The algorithm is as follows - + +1. Check for errors like invalid data/labels. +2. Calculate the euclidean distance of all the data points in training data with the classification point +3. Sort the distances of points along with their classes in ascending order +4. Take the initial "K" classes and find the mode to get the most similar class +5. Report the most similar class + +Here is a visualization for better understanding - + +![KNN Visualization](https://media.geeksforgeeks.org/wp-content/uploads/graph2-2.png) + +Here, as we can see, the classification of unknown points will be judged by their proximity to other points. + +It is important to note that "K" is preferred to have odd values in order to break ties. Usually "K" is taken as 3 or 5. + +## References + +- [GeeksforGeeks](https://media.geeksforgeeks.org/wp-content/uploads/graph2-2.png) diff --git a/src/algorithms/ML/KNN/__test__/knn.test.js b/src/algorithms/ML/KNN/__test__/knn.test.js new file mode 100644 index 0000000000..7e3a7b8e2f --- /dev/null +++ b/src/algorithms/ML/KNN/__test__/knn.test.js @@ -0,0 +1,36 @@ +import KNN from '../knn' +describe('KNN', () => { + it('should throw an error on invalid data', () => { + const nodata = () => { + KNN(); + } + expect(nodata).toThrowError(); + }) + it('should throw an error on invalid labels', () => { + const nolabels = () => { + KNN([[1, 1]]); + } + expect(nolabels).toThrowError(); + }) + it('should throw an error on not giving classification vector', () => { + const noclassification = () => { + KNN([[1, 1]], [1]); + } + expect(noclassification).toThrowError(); + }) + it('should throw an error on not giving classification vector', () => { + const inconsistent = () => {KNN([[1, 1]], [1], [1]); + } + expect(inconsistent).toThrowError(); + }) + it('should find the nearest neighbour', () => { + var data_x = [[1, 1],[2, 2]]; + var data_y = [1, 2]; + expect(KNN(data_x, data_y, [1, 1])).toBe(1); + + var data_x = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]] + var data_y = [1, 2, 1, 2, 1, 2, 1] + expect(KNN(data_x, data_y, [1.25, 1.25])) + .toBe(1) + }) +}) diff --git a/src/algorithms/ML/KNN/knn.js b/src/algorithms/ML/KNN/knn.js new file mode 100644 index 0000000000..579f9add14 --- /dev/null +++ b/src/algorithms/ML/KNN/knn.js @@ -0,0 +1,81 @@ +/** + * @param {object} data_y + * @param {object} data_x + * @param {object} to_classify_x + * @param {number} k + * @return {number} + */ +export default function KNN(data_x, data_y, to_classify_x, k) { + // checking errors + + // if no data given, data object is blank, no class data given, notify and return + try { + data_x[0]; + if (data_x[0].length < 1) throw ''; + } catch (error) { + throw 'Error: data invalid/empty/number of classes less than 1.'; + } + + // if no class lables given or number of x_vectors dont match with number of y vectors, notify and return + try { + data_y[0]; + if (!(data_x.length == data_y.length)) throw ''; + } catch (error) { + throw 'Error: labels invalid/empty/size of data and labels dont match'; + } + + // if no vector/data point is given to make prediction, algorithm cannot be proceeded, notify and return + try { + if (!to_classify_x.length == data_x[0].length) throw ''; + } catch (error) { + throw 'Error: no vector given to classify/classification point invalid.'; + } + if (k == undefined) { + k = 3; + } + + // creating function to calculate the euclidean distance between 2 vectors + function euclidean_distance(x1, x2) { + // checking errors + if (x1.length != x2.length) { + throw 'inconsistency between data and classification vector.'; + } + // calculate the euclidean distance between 2 vectors and return + let total_sse = 0; + for (let j = 0; j < x1.length; j++) { + total_sse += Math.pow(x1[j] - x2[j], 2); + } + return Number(Math.sqrt(total_sse).toFixed(2)); + } + + // starting algorithm + + // calculate distance from to_classify_x to each point for all dimensions in data_x + // store distance and point's class_index into distance_class_list + let distance_list = [] + for (var i = 0; i < data_x.length; i++) { + const tm_store = []; + tm_store.push(euclidean_distance(data_x[i], to_classify_x)); + tm_store.push(data_y[i]); + distance_list[i] = tm_store; + } + + // sort distance_list + // take initial k values, count with class index + distance_list = distance_list.sort().slice(0, k); + + // count the number of instances of each class in top k members + // with that maintain record of highest count class simultanously + const mode_k = {}; + const maxm = [-1, -1]; + for (i = 0; i < Math.min(k, distance_list.length); i++) { + if (distance_list[i][1] in mode_k) mode_k[distance_list[i][1]] += 1; + else mode_k[distance_list[i][1]] = 1; + if (mode_k[distance_list[i][1]] > maxm[0]) { + maxm[0] = mode_k[distance_list[i][1]]; + maxm[1] = distance_list[i][1]; + } + } + // return the class with highest count from maxm + return maxm[1]; +} From a21bbdc70109eec539fbebaa47a3170b151242d2 Mon Sep 17 00:00:00 2001 From: Avi Agrawal <39293511+avi09@users.noreply.github.com> Date: Tue, 15 Dec 2020 14:01:15 -0500 Subject: [PATCH 2/6] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7db1a025c8..2ee72e2268 100644 --- a/README.md +++ b/README.md @@ -143,7 +143,7 @@ a set of rules that precisely define a sequence of operations. * `B` [Caesar Cipher](src/algorithms/cryptography/caesar-cipher) - simple substitution cipher * **Machine Learning** * `B` [NanoNeuron](https://github.com/trekhleb/nano-neuron) - 7 simple JS functions that illustrate how machines can actually learn (forward/backward propagation) - * `B` [KNN](src/algorithms/algorithm/ML/KNN) - K Nearest Neighbors + * `B` [KNN](src/algorithms/ML/KNN) - K Nearest Neighbors * **Uncategorized** * `B` [Tower of Hanoi](src/algorithms/uncategorized/hanoi-tower) * `B` [Square Matrix Rotation](src/algorithms/uncategorized/square-matrix-rotation) - in-place algorithm From ad73dbfdc134f444e8de17200c3a084a16940a00 Mon Sep 17 00:00:00 2001 From: Avi Agrawal Date: Tue, 15 Dec 2020 17:37:41 -0500 Subject: [PATCH 3/6] new --- src/algorithms/ML/KNN/__test__/knn.test.js | 70 ++++++------ src/algorithms/ML/KNN/knn.js | 126 ++++++++++----------- 2 files changed, 99 insertions(+), 97 deletions(-) diff --git a/src/algorithms/ML/KNN/__test__/knn.test.js b/src/algorithms/ML/KNN/__test__/knn.test.js index 7e3a7b8e2f..4685eacb5d 100644 --- a/src/algorithms/ML/KNN/__test__/knn.test.js +++ b/src/algorithms/ML/KNN/__test__/knn.test.js @@ -1,36 +1,38 @@ -import KNN from '../knn' +import KNN from '../knn'; + describe('KNN', () => { - it('should throw an error on invalid data', () => { - const nodata = () => { - KNN(); - } - expect(nodata).toThrowError(); - }) - it('should throw an error on invalid labels', () => { - const nolabels = () => { - KNN([[1, 1]]); - } - expect(nolabels).toThrowError(); - }) - it('should throw an error on not giving classification vector', () => { - const noclassification = () => { - KNN([[1, 1]], [1]); - } - expect(noclassification).toThrowError(); - }) - it('should throw an error on not giving classification vector', () => { - const inconsistent = () => {KNN([[1, 1]], [1], [1]); - } - expect(inconsistent).toThrowError(); - }) - it('should find the nearest neighbour', () => { - var data_x = [[1, 1],[2, 2]]; - var data_y = [1, 2]; - expect(KNN(data_x, data_y, [1, 1])).toBe(1); + it('should throw an error on invalid data', () => { + const nodata = () => { + KNN(); + }; + expect(nodata).toThrowError(); + }); + it('should throw an error on invalid labels', () => { + const nolabels = () => { + KNN([[1, 1]]); + }; + expect(nolabels).toThrowError(); + }); + it('should throw an error on not giving classification vector', () => { + const noclassification = () => { + KNN([[1, 1]], [1]); + }; + expect(noclassification).toThrowError(); + }); + it('should throw an error on not giving classification vector', () => { + const inconsistent = () => { + KNN([[1, 1]], [1], [1]); + }; + expect(inconsistent).toThrowError(); + }); + it('should find the nearest neighbour', () => { + var data_x = [[1, 1], [2, 2]]; + var data_y = [1, 2]; + expect(KNN(data_x, data_y, [1, 1])).toBe(1); - var data_x = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]] - var data_y = [1, 2, 1, 2, 1, 2, 1] - expect(KNN(data_x, data_y, [1.25, 1.25])) - .toBe(1) - }) -}) + var data_x = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]]; + var data_y = [1, 2, 1, 2, 1, 2, 1]; + expect(KNN(data_x, data_y, [1.25, 1.25])) + .toBe(1); + }); +}); diff --git a/src/algorithms/ML/KNN/knn.js b/src/algorithms/ML/KNN/knn.js index 579f9add14..1c61e57e76 100644 --- a/src/algorithms/ML/KNN/knn.js +++ b/src/algorithms/ML/KNN/knn.js @@ -6,76 +6,76 @@ * @return {number} */ export default function KNN(data_x, data_y, to_classify_x, k) { - // checking errors - - // if no data given, data object is blank, no class data given, notify and return - try { - data_x[0]; - if (data_x[0].length < 1) throw ''; - } catch (error) { - throw 'Error: data invalid/empty/number of classes less than 1.'; - } + // checking errors - // if no class lables given or number of x_vectors dont match with number of y vectors, notify and return - try { - data_y[0]; - if (!(data_x.length == data_y.length)) throw ''; - } catch (error) { - throw 'Error: labels invalid/empty/size of data and labels dont match'; - } + // if no data given, data object is blank, no class data given, notify and return + try { + data_x[0]; + if (data_x[0].length < 1) throw ''; + } catch (error) { + throw 'Error: data invalid/empty/number of classes less than 1.'; + } - // if no vector/data point is given to make prediction, algorithm cannot be proceeded, notify and return - try { - if (!to_classify_x.length == data_x[0].length) throw ''; - } catch (error) { - throw 'Error: no vector given to classify/classification point invalid.'; - } - if (k == undefined) { - k = 3; - } + // if no class lables given or number of x_vectors dont match with number of y vectors, notify and return + try { + data_y[0]; + if (!(data_x.length == data_y.length)) throw ''; + } catch (error) { + throw 'Error: labels invalid/empty/size of data and labels dont match'; + } - // creating function to calculate the euclidean distance between 2 vectors - function euclidean_distance(x1, x2) { - // checking errors - if (x1.length != x2.length) { - throw 'inconsistency between data and classification vector.'; - } - // calculate the euclidean distance between 2 vectors and return - let total_sse = 0; - for (let j = 0; j < x1.length; j++) { - total_sse += Math.pow(x1[j] - x2[j], 2); - } - return Number(Math.sqrt(total_sse).toFixed(2)); + // if no vector/data point is given to make prediction, algorithm cannot be proceeded, notify and return + try { + if (!to_classify_x.length == data_x[0].length) throw ''; + } catch (error) { + throw 'Error: no vector given to classify/classification point invalid.'; + } + if (k == undefined) { + k = 3; + } + + // creating function to calculate the euclidean distance between 2 vectors + function euclidean_distance(x1, x2) { + // checking errors + if (x1.length != x2.length) { + throw 'inconsistency between data and classification vector.'; } + // calculate the euclidean distance between 2 vectors and return + let total_sse = 0; + for (let j = 0; j < x1.length; j++) { + total_sse += Math.pow(x1[j] - x2[j], 2); + } + return Number(Math.sqrt(total_sse).toFixed(2)); + } - // starting algorithm + // starting algorithm - // calculate distance from to_classify_x to each point for all dimensions in data_x - // store distance and point's class_index into distance_class_list - let distance_list = [] - for (var i = 0; i < data_x.length; i++) { - const tm_store = []; - tm_store.push(euclidean_distance(data_x[i], to_classify_x)); - tm_store.push(data_y[i]); - distance_list[i] = tm_store; - } + // calculate distance from to_classify_x to each point for all dimensions in data_x + // store distance and point's class_index into distance_class_list + let distance_list = []; + for (var i = 0; i < data_x.length; i++) { + const tm_store = []; + tm_store.push(euclidean_distance(data_x[i], to_classify_x)); + tm_store.push(data_y[i]); + distance_list[i] = tm_store; + } - // sort distance_list - // take initial k values, count with class index - distance_list = distance_list.sort().slice(0, k); + // sort distance_list + // take initial k values, count with class index + distance_list = distance_list.sort().slice(0, k); - // count the number of instances of each class in top k members - // with that maintain record of highest count class simultanously - const mode_k = {}; - const maxm = [-1, -1]; - for (i = 0; i < Math.min(k, distance_list.length); i++) { - if (distance_list[i][1] in mode_k) mode_k[distance_list[i][1]] += 1; - else mode_k[distance_list[i][1]] = 1; - if (mode_k[distance_list[i][1]] > maxm[0]) { - maxm[0] = mode_k[distance_list[i][1]]; - maxm[1] = distance_list[i][1]; - } + // count the number of instances of each class in top k members + // with that maintain record of highest count class simultanously + const mode_k = {}; + const maxm = [-1, -1]; + for (i = 0; i < Math.min(k, distance_list.length); i++) { + if (distance_list[i][1] in mode_k) mode_k[distance_list[i][1]] += 1; + else mode_k[distance_list[i][1]] = 1; + if (mode_k[distance_list[i][1]] > maxm[0]) { + maxm[0] = mode_k[distance_list[i][1]]; + maxm[1] = distance_list[i][1]; } - // return the class with highest count from maxm - return maxm[1]; + } + // return the class with highest count from maxm + return maxm[1]; } From 6373e4c39a89a33ca7d88674697d54db68fd2d16 Mon Sep 17 00:00:00 2001 From: Avi Agrawal Date: Tue, 15 Dec 2020 17:42:23 -0500 Subject: [PATCH 4/6] new --- src/algorithms/ML/KNN/__test__/knn.test.js | 12 ++-- src/algorithms/ML/KNN/knn.js | 78 +++++++++++----------- 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/src/algorithms/ML/KNN/__test__/knn.test.js b/src/algorithms/ML/KNN/__test__/knn.test.js index 4685eacb5d..c7078f9abe 100644 --- a/src/algorithms/ML/KNN/__test__/knn.test.js +++ b/src/algorithms/ML/KNN/__test__/knn.test.js @@ -26,13 +26,13 @@ describe('KNN', () => { expect(inconsistent).toThrowError(); }); it('should find the nearest neighbour', () => { - var data_x = [[1, 1], [2, 2]]; - var data_y = [1, 2]; - expect(KNN(data_x, data_y, [1, 1])).toBe(1); + let dataX = [[1, 1], [2, 2]]; + let dataY = [1, 2]; + expect(KNN(dataX, dataY, [1, 1])).toBe(1); - var data_x = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]]; - var data_y = [1, 2, 1, 2, 1, 2, 1]; - expect(KNN(data_x, data_y, [1.25, 1.25])) + dataX = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]]; + dataY = [1, 2, 1, 2, 1, 2, 1]; + expect(KNN(dataX, dataY, [1.25, 1.25])) .toBe(1); }); }); diff --git a/src/algorithms/ML/KNN/knn.js b/src/algorithms/ML/KNN/knn.js index 1c61e57e76..b665c98239 100644 --- a/src/algorithms/ML/KNN/knn.js +++ b/src/algorithms/ML/KNN/knn.js @@ -1,79 +1,81 @@ /** - * @param {object} data_y - * @param {object} data_x - * @param {object} to_classify_x + * @param {object} dataY + * @param {object} dataX + * @param {object} toClassify * @param {number} k * @return {number} */ -export default function KNN(data_x, data_y, to_classify_x, k) { +export default function KNN(dataX, dataY, toClassify, K) { + let k = -1; // checking errors // if no data given, data object is blank, no class data given, notify and return try { - data_x[0]; - if (data_x[0].length < 1) throw ''; + if (dataX.length < 1) throw new Error('Error'); + if (dataX[0].length < 1) throw new Error('Error'); } catch (error) { - throw 'Error: data invalid/empty/number of classes less than 1.'; + throw new Error('Error: data invalid/empty/number of classes less than 1.'); } - // if no class lables given or number of x_vectors dont match with number of y vectors, notify and return + // if no class lables given or number of x_vectors dont match with number of y vectors, error try { - data_y[0]; - if (!(data_x.length == data_y.length)) throw ''; + if (dataY.length < 1) throw new Error('Error'); + if (!(dataX.length === dataY.length)) throw new Error('Error'); } catch (error) { - throw 'Error: labels invalid/empty/size of data and labels dont match'; + throw new Error('Error: labels invalid/empty/size of data and labels dont match'); } - // if no vector/data point is given to make prediction, algorithm cannot be proceeded, notify and return + // if no vector/data point is given to make prediction, algorithm cannot be proceeded, error try { - if (!to_classify_x.length == data_x[0].length) throw ''; + if (!toClassify.length === dataX[0].length) throw new Error(''); } catch (error) { - throw 'Error: no vector given to classify/classification point invalid.'; + throw new Error('Error: no vector given to classify/classification point invalid.'); } - if (k == undefined) { + if (K === undefined) { k = 3; + } else { + k = K; } // creating function to calculate the euclidean distance between 2 vectors - function euclidean_distance(x1, x2) { + function euclideanDistance(x1, x2) { // checking errors - if (x1.length != x2.length) { - throw 'inconsistency between data and classification vector.'; + if (x1.length !== x2.length) { + throw new Error('inconsistency between data and classification vector.'); } // calculate the euclidean distance between 2 vectors and return - let total_sse = 0; - for (let j = 0; j < x1.length; j++) { - total_sse += Math.pow(x1[j] - x2[j], 2); + let totalSSE = 0; + for (let j = 0; j < x1.length; j += 1) { + totalSSE += (x1[j] - x2[j]) ** 2; } - return Number(Math.sqrt(total_sse).toFixed(2)); + return Number(Math.sqrt(totalSSE).toFixed(2)); } // starting algorithm - // calculate distance from to_classify_x to each point for all dimensions in data_x + // calculate distance from toClassify to each point for all dimensions in dataX // store distance and point's class_index into distance_class_list - let distance_list = []; - for (var i = 0; i < data_x.length; i++) { - const tm_store = []; - tm_store.push(euclidean_distance(data_x[i], to_classify_x)); - tm_store.push(data_y[i]); - distance_list[i] = tm_store; + let distanceList = []; + for (let i = 0; i < dataX.length; i += 1) { + const tmStore = []; + tmStore.push(euclideanDistance(dataX[i], toClassify)); + tmStore.push(dataY[i]); + distanceList[i] = tmStore; } - // sort distance_list + // sort distanceList // take initial k values, count with class index - distance_list = distance_list.sort().slice(0, k); + distanceList = distanceList.sort().slice(0, k); // count the number of instances of each class in top k members // with that maintain record of highest count class simultanously - const mode_k = {}; + const modeK = {}; const maxm = [-1, -1]; - for (i = 0; i < Math.min(k, distance_list.length); i++) { - if (distance_list[i][1] in mode_k) mode_k[distance_list[i][1]] += 1; - else mode_k[distance_list[i][1]] = 1; - if (mode_k[distance_list[i][1]] > maxm[0]) { - maxm[0] = mode_k[distance_list[i][1]]; - maxm[1] = distance_list[i][1]; + for (let i = 0; i < Math.min(k, distanceList.length); i += 1) { + if (distanceList[i][1] in modeK) modeK[distanceList[i][1]] += 1; + else modeK[distanceList[i][1]] = 1; + if (modeK[distanceList[i][1]] > maxm[0]) { + [maxm[0], maxm[1]] = [modeK[distanceList[i][1]], distanceList[i][1]]; } } // return the class with highest count from maxm From 4f3cac9cd84bd5f738acc0a78bdb94429b1c2829 Mon Sep 17 00:00:00 2001 From: Avi Agrawal Date: Tue, 15 Dec 2020 18:46:14 -0500 Subject: [PATCH 5/6] updated tests --- src/algorithms/ML/KNN/__test__/knn.test.js | 14 ++++++++----- src/algorithms/ML/KNN/knn.js | 23 ++++++---------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/src/algorithms/ML/KNN/__test__/knn.test.js b/src/algorithms/ML/KNN/__test__/knn.test.js index c7078f9abe..6884598505 100644 --- a/src/algorithms/ML/KNN/__test__/knn.test.js +++ b/src/algorithms/ML/KNN/__test__/knn.test.js @@ -1,13 +1,12 @@ import KNN from '../knn'; describe('KNN', () => { - it('should throw an error on invalid data', () => { - const nodata = () => { + test('should throw an error on invalid data', () => { + expect(() => { KNN(); - }; - expect(nodata).toThrowError(); + }).toThrowError(); }); - it('should throw an error on invalid labels', () => { + test('should throw an error on invalid labels', () => { const nolabels = () => { KNN([[1, 1]]); }; @@ -34,5 +33,10 @@ describe('KNN', () => { dataY = [1, 2, 1, 2, 1, 2, 1]; expect(KNN(dataX, dataY, [1.25, 1.25])) .toBe(1); + + dataX = [[1, 1], [6, 2], [3, 3], [4, 5], [9, 2], [2, 4], [8, 7]]; + dataY = [1, 2, 1, 2, 1, 2, 1]; + expect(KNN(dataX, dataY, [1.25, 1.25], 5)) + .toBe(2); }); }); diff --git a/src/algorithms/ML/KNN/knn.js b/src/algorithms/ML/KNN/knn.js index b665c98239..8f5c9881fa 100644 --- a/src/algorithms/ML/KNN/knn.js +++ b/src/algorithms/ML/KNN/knn.js @@ -10,27 +10,16 @@ export default function KNN(dataX, dataY, toClassify, K) { // checking errors // if no data given, data object is blank, no class data given, notify and return - try { - if (dataX.length < 1) throw new Error('Error'); - if (dataX[0].length < 1) throw new Error('Error'); - } catch (error) { - throw new Error('Error: data invalid/empty/number of classes less than 1.'); - } + if (dataX.length < 1) throw new Error('Error: data invalid/empty/number of classes less than 1.'); + else if (dataX[0].length < 1) throw new Error('Error: data invalid/empty/number of classes less than 1.'); // if no class lables given or number of x_vectors dont match with number of y vectors, error - try { - if (dataY.length < 1) throw new Error('Error'); - if (!(dataX.length === dataY.length)) throw new Error('Error'); - } catch (error) { - throw new Error('Error: labels invalid/empty/size of data and labels dont match'); - } + else if (dataY.length < 1) throw new Error('Error: labels invalid/empty/size of data and labels dont match'); + else if (!(dataX.length === dataY.length)) throw new Error('Error: labels invalid/empty/size of data and labels dont match'); // if no vector/data point is given to make prediction, algorithm cannot be proceeded, error - try { - if (!toClassify.length === dataX[0].length) throw new Error(''); - } catch (error) { - throw new Error('Error: no vector given to classify/classification point invalid.'); - } + else if (!toClassify.length === dataX[0].length) throw new Error('Error: no vector given to classify/classification point invalid.'); + if (K === undefined) { k = 3; } else { From ac760f3d2c915116b4f025cbcf38a026767bbb4c Mon Sep 17 00:00:00 2001 From: Avi Agrawal Date: Tue, 15 Dec 2020 18:52:03 -0500 Subject: [PATCH 6/6] updated knn coverage --- src/algorithms/ML/KNN/knn.js | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/algorithms/ML/KNN/knn.js b/src/algorithms/ML/KNN/knn.js index 8f5c9881fa..866b6a22e9 100644 --- a/src/algorithms/ML/KNN/knn.js +++ b/src/algorithms/ML/KNN/knn.js @@ -7,18 +7,6 @@ */ export default function KNN(dataX, dataY, toClassify, K) { let k = -1; - // checking errors - - // if no data given, data object is blank, no class data given, notify and return - if (dataX.length < 1) throw new Error('Error: data invalid/empty/number of classes less than 1.'); - else if (dataX[0].length < 1) throw new Error('Error: data invalid/empty/number of classes less than 1.'); - - // if no class lables given or number of x_vectors dont match with number of y vectors, error - else if (dataY.length < 1) throw new Error('Error: labels invalid/empty/size of data and labels dont match'); - else if (!(dataX.length === dataY.length)) throw new Error('Error: labels invalid/empty/size of data and labels dont match'); - - // if no vector/data point is given to make prediction, algorithm cannot be proceeded, error - else if (!toClassify.length === dataX[0].length) throw new Error('Error: no vector given to classify/classification point invalid.'); if (K === undefined) { k = 3;