Skip to content

Commit c84b2b0

Browse files
Merge branch 'TheAlgorithms:master' into main
2 parents fab2af2 + dec9643 commit c84b2b0

File tree

10 files changed

+728
-165
lines changed

10 files changed

+728
-165
lines changed

Diff for: DIRECTORY.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
* [Binary Shifts](bit_manipulation/binary_shifts.py)
4444
* [Binary Twos Complement](bit_manipulation/binary_twos_complement.py)
4545
* [Binary Xor Operator](bit_manipulation/binary_xor_operator.py)
46+
* [Bitwise Addition Recursive](bit_manipulation/bitwise_addition_recursive.py)
4647
* [Count 1S Brian Kernighan Method](bit_manipulation/count_1s_brian_kernighan_method.py)
4748
* [Count Number Of One Bits](bit_manipulation/count_number_of_one_bits.py)
4849
* [Gray Code Sequence](bit_manipulation/gray_code_sequence.py)
@@ -507,7 +508,6 @@
507508
* [Gradient Descent](machine_learning/gradient_descent.py)
508509
* [K Means Clust](machine_learning/k_means_clust.py)
509510
* [K Nearest Neighbours](machine_learning/k_nearest_neighbours.py)
510-
* [Knn Sklearn](machine_learning/knn_sklearn.py)
511511
* [Linear Discriminant Analysis](machine_learning/linear_discriminant_analysis.py)
512512
* [Linear Regression](machine_learning/linear_regression.py)
513513
* Local Weighted Learning
@@ -748,6 +748,7 @@
748748
* [Scoring Algorithm](other/scoring_algorithm.py)
749749
* [Sdes](other/sdes.py)
750750
* [Tower Of Hanoi](other/tower_of_hanoi.py)
751+
* [Word Search](other/word_search.py)
751752

752753
## Physics
753754
* [Altitude Pressure](physics/altitude_pressure.py)
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,48 @@
1+
from pathlib import Path
2+
13
import numpy as np
24
from PIL import Image
35

46

5-
def rgb2gray(rgb: np.array) -> np.array:
7+
def rgb_to_gray(rgb: np.ndarray) -> np.ndarray:
68
"""
79
Return gray image from rgb image
8-
>>> rgb2gray(np.array([[[127, 255, 0]]]))
10+
11+
>>> rgb_to_gray(np.array([[[127, 255, 0]]]))
912
array([[187.6453]])
10-
>>> rgb2gray(np.array([[[0, 0, 0]]]))
13+
>>> rgb_to_gray(np.array([[[0, 0, 0]]]))
1114
array([[0.]])
12-
>>> rgb2gray(np.array([[[2, 4, 1]]]))
15+
>>> rgb_to_gray(np.array([[[2, 4, 1]]]))
1316
array([[3.0598]])
14-
>>> rgb2gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
17+
>>> rgb_to_gray(np.array([[[26, 255, 14], [5, 147, 20], [1, 200, 0]]]))
1518
array([[159.0524, 90.0635, 117.6989]])
1619
"""
1720
r, g, b = rgb[:, :, 0], rgb[:, :, 1], rgb[:, :, 2]
1821
return 0.2989 * r + 0.5870 * g + 0.1140 * b
1922

2023

21-
def gray2binary(gray: np.array) -> np.array:
24+
def gray_to_binary(gray: np.ndarray) -> np.ndarray:
2225
"""
2326
Return binary image from gray image
2427
25-
>>> gray2binary(np.array([[127, 255, 0]]))
28+
>>> gray_to_binary(np.array([[127, 255, 0]]))
2629
array([[False, True, False]])
27-
>>> gray2binary(np.array([[0]]))
30+
>>> gray_to_binary(np.array([[0]]))
2831
array([[False]])
29-
>>> gray2binary(np.array([[26.2409, 4.9315, 1.4729]]))
32+
>>> gray_to_binary(np.array([[26.2409, 4.9315, 1.4729]]))
3033
array([[False, False, False]])
31-
>>> gray2binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
34+
>>> gray_to_binary(np.array([[26, 255, 14], [5, 147, 20], [1, 200, 0]]))
3235
array([[False, True, False],
3336
[False, True, False],
3437
[False, True, False]])
3538
"""
3639
return (gray > 127) & (gray <= 255)
3740

3841

39-
def erosion(image: np.array, kernel: np.array) -> np.array:
42+
def erosion(image: np.ndarray, kernel: np.ndarray) -> np.ndarray:
4043
"""
4144
Return eroded image
45+
4246
>>> erosion(np.array([[True, True, False]]), np.array([[0, 1, 0]]))
4347
array([[False, False, False]])
4448
>>> erosion(np.array([[True, False, False]]), np.array([[1, 1, 0]]))
@@ -62,14 +66,17 @@ def erosion(image: np.array, kernel: np.array) -> np.array:
6266
return output
6367

6468

65-
# kernel to be applied
66-
structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
67-
6869
if __name__ == "__main__":
6970
# read original image
70-
image = np.array(Image.open(r"..\image_data\lena.jpg"))
71+
lena_path = Path(__file__).resolve().parent / "image_data" / "lena.jpg"
72+
lena = np.array(Image.open(lena_path))
73+
74+
# kernel to be applied
75+
structuring_element = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
76+
7177
# Apply erosion operation to a binary image
72-
output = erosion(gray2binary(rgb2gray(image)), structuring_element)
78+
output = erosion(gray_to_binary(rgb_to_gray(lena)), structuring_element)
79+
7380
# Save the output image
7481
pil_img = Image.fromarray(output).convert("RGB")
7582
pil_img.save("result_erosion.png")

Diff for: dynamic_programming/smith_waterman.py

+193
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
"""
2+
https://en.wikipedia.org/wiki/Smith%E2%80%93Waterman_algorithm
3+
The Smith-Waterman algorithm is a dynamic programming algorithm used for sequence
4+
alignment. It is particularly useful for finding similarities between two sequences,
5+
such as DNA or protein sequences. In this implementation, gaps are penalized
6+
linearly, meaning that the score is reduced by a fixed amount for each gap introduced
7+
in the alignment. However, it's important to note that the Smith-Waterman algorithm
8+
supports other gap penalty methods as well.
9+
"""
10+
11+
12+
def score_function(
13+
source_char: str,
14+
target_char: str,
15+
match: int = 1,
16+
mismatch: int = -1,
17+
gap: int = -2,
18+
) -> int:
19+
"""
20+
Calculate the score for a character pair based on whether they match or mismatch.
21+
Returns 1 if the characters match, -1 if they mismatch, and -2 if either of the
22+
characters is a gap.
23+
>>> score_function('A', 'A')
24+
1
25+
>>> score_function('A', 'C')
26+
-1
27+
>>> score_function('-', 'A')
28+
-2
29+
>>> score_function('A', '-')
30+
-2
31+
>>> score_function('-', '-')
32+
-2
33+
"""
34+
if "-" in (source_char, target_char):
35+
return gap
36+
return match if source_char == target_char else mismatch
37+
38+
39+
def smith_waterman(
40+
query: str,
41+
subject: str,
42+
match: int = 1,
43+
mismatch: int = -1,
44+
gap: int = -2,
45+
) -> list[list[int]]:
46+
"""
47+
Perform the Smith-Waterman local sequence alignment algorithm.
48+
Returns a 2D list representing the score matrix. Each value in the matrix
49+
corresponds to the score of the best local alignment ending at that point.
50+
>>> smith_waterman('ACAC', 'CA')
51+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
52+
>>> smith_waterman('acac', 'ca')
53+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
54+
>>> smith_waterman('ACAC', 'ca')
55+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
56+
>>> smith_waterman('acac', 'CA')
57+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
58+
>>> smith_waterman('ACAC', '')
59+
[[0], [0], [0], [0], [0]]
60+
>>> smith_waterman('', 'CA')
61+
[[0, 0, 0]]
62+
>>> smith_waterman('ACAC', 'CA')
63+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
64+
65+
>>> smith_waterman('acac', 'ca')
66+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
67+
68+
>>> smith_waterman('ACAC', 'ca')
69+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
70+
71+
>>> smith_waterman('acac', 'CA')
72+
[[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]]
73+
74+
>>> smith_waterman('ACAC', '')
75+
[[0], [0], [0], [0], [0]]
76+
77+
>>> smith_waterman('', 'CA')
78+
[[0, 0, 0]]
79+
80+
>>> smith_waterman('AGT', 'AGT')
81+
[[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3]]
82+
83+
>>> smith_waterman('AGT', 'GTA')
84+
[[0, 0, 0, 0], [0, 0, 0, 1], [0, 1, 0, 0], [0, 0, 2, 0]]
85+
86+
>>> smith_waterman('AGT', 'GTC')
87+
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0]]
88+
89+
>>> smith_waterman('AGT', 'G')
90+
[[0, 0], [0, 0], [0, 1], [0, 0]]
91+
92+
>>> smith_waterman('G', 'AGT')
93+
[[0, 0, 0, 0], [0, 0, 1, 0]]
94+
95+
>>> smith_waterman('AGT', 'AGTCT')
96+
[[0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0], [0, 0, 0, 3, 1, 1]]
97+
98+
>>> smith_waterman('AGTCT', 'AGT')
99+
[[0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 0, 1], [0, 0, 0, 1]]
100+
101+
>>> smith_waterman('AGTCT', 'GTC')
102+
[[0, 0, 0, 0], [0, 0, 0, 0], [0, 1, 0, 0], [0, 0, 2, 0], [0, 0, 0, 3], [0, 0, 1, 1]]
103+
"""
104+
# make both query and subject uppercase
105+
query = query.upper()
106+
subject = subject.upper()
107+
108+
# Initialize score matrix
109+
m = len(query)
110+
n = len(subject)
111+
score = [[0] * (n + 1) for _ in range(m + 1)]
112+
kwargs = {"match": match, "mismatch": mismatch, "gap": gap}
113+
114+
for i in range(1, m + 1):
115+
for j in range(1, n + 1):
116+
# Calculate scores for each cell
117+
match = score[i - 1][j - 1] + score_function(
118+
query[i - 1], subject[j - 1], **kwargs
119+
)
120+
delete = score[i - 1][j] + gap
121+
insert = score[i][j - 1] + gap
122+
123+
# Take maximum score
124+
score[i][j] = max(0, match, delete, insert)
125+
126+
return score
127+
128+
129+
def traceback(score: list[list[int]], query: str, subject: str) -> str:
130+
r"""
131+
Perform traceback to find the optimal local alignment.
132+
Starts from the highest scoring cell in the matrix and traces back recursively
133+
until a 0 score is found. Returns the alignment strings.
134+
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'CA')
135+
'CA\nCA'
136+
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'ca')
137+
'CA\nCA'
138+
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'ACAC', 'ca')
139+
'CA\nCA'
140+
>>> traceback([[0, 0, 0], [0, 0, 1], [0, 1, 0], [0, 0, 2], [0, 1, 0]], 'acac', 'CA')
141+
'CA\nCA'
142+
>>> traceback([[0, 0, 0]], 'ACAC', '')
143+
''
144+
"""
145+
# make both query and subject uppercase
146+
query = query.upper()
147+
subject = subject.upper()
148+
# find the indices of the maximum value in the score matrix
149+
max_value = float("-inf")
150+
i_max = j_max = 0
151+
for i, row in enumerate(score):
152+
for j, value in enumerate(row):
153+
if value > max_value:
154+
max_value = value
155+
i_max, j_max = i, j
156+
# Traceback logic to find optimal alignment
157+
i = i_max
158+
j = j_max
159+
align1 = ""
160+
align2 = ""
161+
gap = score_function("-", "-")
162+
# guard against empty query or subject
163+
if i == 0 or j == 0:
164+
return ""
165+
while i > 0 and j > 0:
166+
if score[i][j] == score[i - 1][j - 1] + score_function(
167+
query[i - 1], subject[j - 1]
168+
):
169+
# optimal path is a diagonal take both letters
170+
align1 = query[i - 1] + align1
171+
align2 = subject[j - 1] + align2
172+
i -= 1
173+
j -= 1
174+
elif score[i][j] == score[i - 1][j] + gap:
175+
# optimal path is a vertical
176+
align1 = query[i - 1] + align1
177+
align2 = f"-{align2}"
178+
i -= 1
179+
else:
180+
# optimal path is a horizontal
181+
align1 = f"-{align1}"
182+
align2 = subject[j - 1] + align2
183+
j -= 1
184+
185+
return f"{align1}\n{align2}"
186+
187+
188+
if __name__ == "__main__":
189+
query = "HEAGAWGHEE"
190+
subject = "PAWHEAE"
191+
192+
score = smith_waterman(query, subject, match=1, mismatch=-1, gap=-2)
193+
print(traceback(score, query, subject))

Diff for: machine_learning/k_means_clust.py

+10-13
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
- initial_centroids , initial centroid values generated by utility function(mentioned
1212
in usage).
1313
- maxiter , maximum number of iterations to process.
14-
- heterogeneity , empty list that will be filled with hetrogeneity values if passed
14+
- heterogeneity , empty list that will be filled with heterogeneity values if passed
1515
to kmeans func.
1616
Usage:
17-
1. define 'k' value, 'X' features array and 'hetrogeneity' empty list
17+
1. define 'k' value, 'X' features array and 'heterogeneity' empty list
1818
2. create initial_centroids,
1919
initial_centroids = get_initial_centroids(
2020
X,
@@ -31,8 +31,8 @@
3131
record_heterogeneity=heterogeneity,
3232
verbose=True # whether to print logs in console or not.(default=False)
3333
)
34-
4. Plot the loss function, hetrogeneity values for every iteration saved in
35-
hetrogeneity list.
34+
4. Plot the loss function and heterogeneity values for every iteration saved in
35+
heterogeneity list.
3636
plot_heterogeneity(
3737
heterogeneity,
3838
k
@@ -198,13 +198,10 @@ def report_generator(
198198
df: pd.DataFrame, clustering_variables: np.ndarray, fill_missing_report=None
199199
) -> pd.DataFrame:
200200
"""
201-
Function generates easy-erading clustering report. It takes 2 arguments as an input:
202-
DataFrame - dataframe with predicted cluester column;
203-
FillMissingReport - dictionary of rules how we are going to fill missing
204-
values of for final report generate (not included in modeling);
205-
in order to run the function following libraries must be imported:
206-
import pandas as pd
207-
import numpy as np
201+
Generates a clustering report. This function takes 2 arguments as input:
202+
df - dataframe with predicted cluster column
203+
fill_missing_report - dictionary of rules on how we are going to fill in missing
204+
values for final generated report (not included in modelling);
208205
>>> data = pd.DataFrame()
209206
>>> data['numbers'] = [1, 2, 3]
210207
>>> data['col1'] = [0.5, 2.5, 4.5]
@@ -306,10 +303,10 @@ def report_generator(
306303
a.columns = report.columns # rename columns to match report
307304
report = report.drop(
308305
report[report.Type == "count"].index
309-
) # drop count values except cluster size
306+
) # drop count values except for cluster size
310307
report = pd.concat(
311308
[report, a, clustersize, clusterproportion], axis=0
312-
) # concat report with clustert size and nan values
309+
) # concat report with cluster size and nan values
313310
report["Mark"] = report["Features"].isin(clustering_variables)
314311
cols = report.columns.tolist()
315312
cols = cols[0:2] + cols[-1:] + cols[2:-1]

0 commit comments

Comments
 (0)