TheAlgorithms
diff --git a/Diff for: ‎DIRECTORY.md
+2-1 b/Diff for: ‎DIRECTORY.md
+2-1
diff --git a/Diff for: ‎maths/sum_of_two_positive_numbers_bitwise.py renamed to ‎bit_manipulation/bitwise_addition_recursive.py b/Diff for: ‎maths/sum_of_two_positive_numbers_bitwise.py renamed to ‎bit_manipulation/bitwise_addition_recursive.py
diff --git a/Diff for: ‎digital_image_processing/morphological_operations/erosion_operation.py
+23-16 b/Diff for: ‎digital_image_processing/morphological_operations/erosion_operation.py
+23-16
diff --git a/Diff for: ‎dynamic_programming/smith_waterman.py
+193 b/Diff for: ‎dynamic_programming/smith_waterman.py
+193
diff --git a/Diff for: ‎machine_learning/k_means_clust.py
+10-13 b/Diff for: ‎machine_learning/k_means_clust.py
+10-13
@@ -43,6 +43,7 @@
   * [Binary Shifts](bit_manipulation/binary_shifts.py)
   * [Binary Twos Complement](bit_manipulation/binary_twos_complement.py)
   * [Binary Xor Operator](bit_manipulation/binary_xor_operator.py)
+  * [Bitwise Addition Recursive](bit_manipulation/bitwise_addition_recursive.py)
   * [Count 1S Brian Kernighan Method](bit_manipulation/count_1s_brian_kernighan_method.py)
   * [Count Number Of One Bits](bit_manipulation/count_number_of_one_bits.py)
   * [Gray Code Sequence](bit_manipulation/gray_code_sequence.py)
@@ -507,7 +508,6 @@
   * [Gradient Descent](machine_learning/gradient_descent.py)
   * [K Means Clust](machine_learning/k_means_clust.py)
   * [K Nearest Neighbours](machine_learning/k_nearest_neighbours.py)
-  * [Knn Sklearn](machine_learning/knn_sklearn.py)
   * [Linear Discriminant Analysis](machine_learning/linear_discriminant_analysis.py)
   * [Linear Regression](machine_learning/linear_regression.py)
   * Local Weighted Learning
@@ -748,6 +748,7 @@
   * [Scoring Algorithm](other/scoring_algorithm.py)
   * [Sdes](other/sdes.py)
   * [Tower Of Hanoi](other/tower_of_hanoi.py)
+  * [Word Search](other/word_search.py)
 
 ## Physics
   * [Altitude Pressure](physics/altitude_pressure.py)
 
@@ -1,44 +1,48 @@
+from pathlib import Path
+
 import numpy as np
 from PIL import Image
 
 
-def rgb2gray(rgb: np.array) -> np.array:
+def rgb_to_gray(rgb: np.ndarray) -> np.ndarray:
     """
     Return gray image from rgb image
-    >>> rgb2gray(np.array([[[127, 255, 0]]]))
+
+    >>> rgb_to_gray(np.array([[[127, 255, 0]]]))
     array([[187.6453]])
-    >>> rgb2gray(np.array([[[0, 0, 0]]]))
+    >>> rgb_to_gray(np.array([[[0, 0, 0]]]))
     array([[0.]])
-    >>> rgb2gray(np.array([[[2, 4, 1]]]))
+    >>> rgb_to_gray(np.array([[[2, 4, 1]]]))
     array([[3.0598]])
-    >>> rgb2gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
+    >>> rgb_to_gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
     array([[159.0524,  90.0635, 117.6989]])
     """
     r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
     return 0.2989 * r + 0.5870 * g + 0.1140 * b
 
 
-def gray2binary(gray: np.array) -> np.array:
+def gray_to_binary(gray: np.ndarray) -> np.ndarray:
     """
     Return binary image from gray image
 
-    >>> gray2binary(np.array([[127, 255, 0]]))
+    >>> gray_to_binary(np.array([[127, 255, 0]]))
     array([[False,  True, False]])
-    >>> gray2binary(np.array([[0]]))
+    >>> gray_to_binary(np.array([[0]]))
     array([[False]])
-    >>> gray2binary(np.array([[26.2409, 4.9315, 1.4729]]))
+    >>> gray_to_binary(np.array([[26.2409, 4.9315, 1.4729]]))
     array([[False, False, False]])
-    >>> gray2binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
+    >>> gray_to_binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
     array([[False,  True, False],
            [False,  True, False],
            [False,  True, False]])
     """
     return (gray > 127) & (gray <= 255)
 
 
-def erosion(image: np.array, kernel: np.array) -> np.array:
+def erosion(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
     """
     Return eroded image
+
     >>> erosion(np.array([[True, True, False]]), np.array([[0, 1, 0]]))
     array([[False, False, False]])
     >>> erosion(np.array([[True, False, False]]), np.array([[1, 1, 0]]))
@@ -62,14 +66,17 @@ def erosion(image: np.array, kernel: np.array) -> np.array:
     return output
 
 
-# kernel to be applied
-structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
-
 if __name__ == "__main__":
     # read original image
-    image = np.array(Image.open(r"..\image_data\lena.jpg"))
+    lena_path = Path(__file__).resolve().parent / "image_data" / "lena.jpg"
+    lena = np.array(Image.open(lena_path))
+
+    # kernel to be applied
+    structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
+
     # Apply erosion operation to a binary image
-    output = erosion(gray2binary(rgb2gray(image)), structuring_element)
+    output = erosion(gray_to_binary(rgb_to_gray(lena)), structuring_element)
+
     # Save the output image
     pil_img = Image.fromarray(output).convert("RGB")
     pil_img.save("result_erosion.png")
@@ -0,0 +1,193 @@
+"""
+https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
+The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence
+alignment. It is particularly useful for finding similarities between two sequences,
+such as DNA or protein sequences. In this implementation, gaps are penalized
+linearly, meaning that the score is reduced by a fixed amount for each gap introduced
+in the alignment. However, it's important to note that the Smith-Waterman algorithm
+supports other gap penalty methods as well.
+"""
+
+
+def score_function(
+    source_char: str,
+    target_char: str,
+    match: int = 1,
+    mismatch: int = -1,
+    gap: int = -2,
+) -> int:
+    """
+    Calculate the score for a character pair based on whether they match or mismatch.
+    Returns 1 if the characters match, -1 if they mismatch, and -2 if either of the
+    characters is a gap.
+    >>> score_function('A', 'A')
+    1
+    >>> score_function('A', 'C')
+    -1
+    >>> score_function('-', 'A')
+    -2
+    >>> score_function('A', '-')
+    -2
+    >>> score_function('-', '-')
+    -2
+    """
+    if "-" in (source_char, target_char):
+        return gap
+    return match if source_char == target_char else mismatch
+
+
+def smith_waterman(
+    query: str,
+    subject: str,
+    match: int = 1,
+    mismatch: int = -1,
+    gap: int = -2,
+) -> list[list[int]]:
+    """
+    Perform the Smith-Waterman local sequence alignment algorithm.
+    Returns a 2D list representing the score matrix. Each value in the matrix
+    corresponds to the score of the best local alignment ending at that point.
+    >>> smith_waterman('ACAC', 'CA')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+    >>> smith_waterman('acac', 'ca')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+    >>> smith_waterman('ACAC', 'ca')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+    >>> smith_waterman('acac', 'CA')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+    >>> smith_waterman('ACAC', '')
+    [[0], [0], [0], [0], [0]]
+    >>> smith_waterman('', 'CA')
+    [[0, 0, 0]]
+    >>> smith_waterman('ACAC', 'CA')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+
+    >>> smith_waterman('acac', 'ca')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+
+    >>> smith_waterman('ACAC', 'ca')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+
+    >>> smith_waterman('acac', 'CA')
+    [[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
+
+    >>> smith_waterman('ACAC', '')
+    [[0], [0], [0], [0], [0]]
+
+    >>> smith_waterman('', 'CA')
+    [[0, 0, 0]]
+
+    >>> smith_waterman('AGT', 'AGT')
+    [[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3]]
+
+    >>> smith_waterman('AGT', 'GTA')
+    [[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 2, 0]]
+
+    >>> smith_waterman('AGT', 'GTC')
+    [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0]]
+
+    >>> smith_waterman('AGT', 'G')
+    [[0, 0], [0, 0], [0, 1], [0, 0]]
+
+    >>> smith_waterman('G', 'AGT')
+    [[0, 0, 0, 0], [0, 0, 1, 0]]
+
+    >>> smith_waterman('AGT', 'AGTCT')
+    [[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0], [0, 0, 0, 3, 1, 1]]
+
+    >>> smith_waterman('AGTCT', 'AGT')
+    [[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 0, 1], [0, 0, 0, 1]]
+
+    >>> smith_waterman('AGTCT', 'GTC')
+    [[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 1, 1]]
+    """
+    # make both query and subject uppercase
+    query = query.upper()
+    subject = subject.upper()
+
+    # Initialize score matrix
+    m = len(query)
+    n = len(subject)
+    score = [[0] * (n + 1) for _ in range(m + 1)]
+    kwargs = {"match": match, "mismatch": mismatch, "gap": gap}
+
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            # Calculate scores for each cell
+            match = score[i - 1][j - 1] + score_function(
+                query[i - 1], subject[j - 1], **kwargs
+            )
+            delete = score[i - 1][j] + gap
+            insert = score[i][j - 1] + gap
+
+            # Take maximum score
+            score[i][j] = max(0, match, delete, insert)
+
+    return score
+
+
+def traceback(score: list[list[int]], query: str, subject: str) -> str:
+    r"""
+    Perform traceback to find the optimal local alignment.
+    Starts from the highest scoring cell in the matrix and traces back recursively
+    until a 0 score is found. Returns the alignment strings.
+    >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA')
+    'CA\nCA'
+    >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'ca')
+    'CA\nCA'
+    >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'ca')
+    'CA\nCA'
+    >>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'CA')
+    'CA\nCA'
+    >>> traceback([[0, 0, 0]], 'ACAC', '')
+    ''
+    """
+    # make both query and subject uppercase
+    query = query.upper()
+    subject = subject.upper()
+    # find the indices of the maximum value in the score matrix
+    max_value = float("-inf")
+    i_max = j_max = 0
+    for i, row in enumerate(score):
+        for j, value in enumerate(row):
+            if value > max_value:
+                max_value = value
+                i_max, j_max = i, j
+    # Traceback logic to find optimal alignment
+    i = i_max
+    j = j_max
+    align1 = ""
+    align2 = ""
+    gap = score_function("-", "-")
+    # guard against empty query or subject
+    if i == 0 or j == 0:
+        return ""
+    while i > 0 and j > 0:
+        if score[i][j] == score[i - 1][j - 1] + score_function(
+            query[i - 1], subject[j - 1]
+        ):
+            # optimal path is a diagonal take both letters
+            align1 = query[i - 1] + align1
+            align2 = subject[j - 1] + align2
+            i -= 1
+            j -= 1
+        elif score[i][j] == score[i - 1][j] + gap:
+            # optimal path is a vertical
+            align1 = query[i - 1] + align1
+            align2 = f"-{align2}"
+            i -= 1
+        else:
+            # optimal path is a horizontal
+            align1 = f"-{align1}"
+            align2 = subject[j - 1] + align2
+            j -= 1
+
+    return f"{align1}\n{align2}"
+
+
+if __name__ == "__main__":
+    query = "HEAGAWGHEE"
+    subject = "PAWHEAE"
+
+    score = smith_waterman(query, subject, match=1, mismatch=-1, gap=-2)
+    print(traceback(score, query, subject))
@@ -11,10 +11,10 @@
   - initial_centroids , initial centroid values generated by utility function(mentioned
     in usage).
   - maxiter , maximum number of iterations to process.
-  - heterogeneity , empty list that will be filled with hetrogeneity values if passed
+  - heterogeneity , empty list that will be filled with heterogeneity values if passed
     to kmeans func.
 Usage:
-  1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
+  1. define 'k' value, 'X' features array and 'heterogeneity' empty list
   2. create initial_centroids,
         initial_centroids = get_initial_centroids(
             X,
@@ -31,8 +31,8 @@
             record_heterogeneity=heterogeneity,
             verbose=True # whether to print logs in console or not.(default=False)
             )
-  4. Plot the loss function, hetrogeneity values for every iteration saved in
-     hetrogeneity list.
+  4. Plot the loss function and heterogeneity values for every iteration saved in
+     heterogeneity list.
         plot_heterogeneity(
             heterogeneity,
             k
@@ -198,13 +198,10 @@ def report_generator(
     df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
 ) -> pd.DataFrame:
     """
-    Function generates easy-erading clustering report. It takes 2 arguments as an input:
-        DataFrame - dataframe with predicted cluester column;
-        FillMissingReport - dictionary of rules how we are going to fill missing
-        values of for final report generate (not included in modeling);
-    in order to run the function following libraries must be imported:
-        import pandas as pd
-        import numpy as np
+    Generates a clustering report. This function takes 2 arguments as input:
+        df - dataframe with predicted cluster column
+        fill_missing_report - dictionary of rules on how we are going to fill in missing
+        values for final generated report (not included in modelling);
     >>> data = pd.DataFrame()
     >>> data['numbers'] = [1, 2, 3]
     >>> data['col1'] = [0.5, 2.5, 4.5]
@@ -306,10 +303,10 @@ def report_generator(
     a.columns = report.columns  # rename columns to match report
     report = report.drop(
         report[report.Type == "count"].index
-    )  # drop count values except cluster size
+    )  # drop count values except for cluster size
     report = pd.concat(
         [report, a, clustersize, clusterproportion], axis=0
-    )  # concat report with clustert size and nan values
+    )  # concat report with cluster size and nan values
     report["Mark"] = report["Features"].isin(clustering_variables)
     cols = report.columns.tolist()
     cols = cols[0:2] + cols[-1:] + cols[2:-1]