forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbasic_string.py
196 lines (171 loc) · 8.77 KB
/
basic_string.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
"""
Simple multithreaded algorithm to show how the 4 phases of a genetic algorithm works
(Evaluation, Selection, Crossover and Mutation)
https://en.wikipedia.org/wiki/Genetic_algorithm
Author: D4rkia
"""
from __future__ import annotations
import random
# Maximum size of the population. Bigger could be faster but is more memory expensive.
N_POPULATION = 200
# Number of elements selected in every generation of evolution. The selection takes
# place from best to worst of that generation and must be smaller than N_POPULATION.
N_SELECTED = 50
# Probability that an element of a generation can mutate, changing one of its genes.
# This will guarantee that all genes will be used during evolution.
MUTATION_PROBABILITY = 0.4
# Just a seed to improve randomness required by the algorithm.
random.seed(random.randint(0, 1000))
# This function uses a genetic algorithm to evolve the string by randomly selecting characters from the genes list and comparing
# the resulting string to the original string, repeating this process until the two strings match.
def basic(target: str, genes: list[str], debug: bool = True) -> tuple[int, int, str]:
"""
These lines of code are testing a function called "basic"
Verify that the target contains no genes besides the ones inside genes variable.
>>> from string import ascii_lowercase
>>> basic("doctest", ascii_lowercase, debug=False)[2]
'doctest'
>>> genes = list(ascii_lowercase)
>>> genes.remove("e")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e'] is not in genes list, evolution cannot converge
>>> genes.remove("s")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e', 's'] is not in genes list, evolution cannot converge
>>> genes.remove("t")
>>> basic("test", genes)
Traceback (most recent call last):
...
ValueError: ['e', 's', 't'] is not in genes list, evolution cannot converge
"""
# Verify if N_POPULATION is bigger than N_SELECTED
if N_POPULATION < N_SELECTED:
raise ValueError(f"{N_POPULATION} must be bigger than {N_SELECTED}")
# Verify that the target contains no genes besides the ones inside genes variable.
not_in_genes_list = sorted({c for c in target if c not in genes})
if not_in_genes_list:
raise ValueError(
f"{not_in_genes_list} is not in genes list, evolution cannot converge"
)
# Generate random starting population.
population = []
for _ in range(N_POPULATION):
population.append("".join([random.choice(genes) for i in range(len(target))]))
# Just some logs to know what the algorithms is doing.
generation, total_population = 0, 0
# This loop will end when we find a perfect match for our target.
while True:
generation += 1
total_population += len(population)
# Random population created. Now it's time to evaluate.
def evaluate(item: str, main_target: str = target) -> tuple[str, float]:
"""
Evaluate how similar the item is with the target by just
counting each char in the right position
>>> evaluate("Helxo Worlx", Hello World)
["Helxo Worlx", 9]
"""
score = len(
[g for position, g in enumerate(item) if g == main_target[position]]
)
return (item, float(score))
# Adding a bit of concurrency can make everything faster,
#
# import concurrent.futures
# population_score: list[tuple[str, float]] = []
# with concurrent.futures.ThreadPoolExecutor(
# max_workers=NUM_WORKERS) as executor:
# futures = {executor.submit(evaluate, item) for item in population}
# concurrent.futures.wait(futures)
# population_score = [item.result() for item in futures]
#
# but with a simple algorithm like this, it will probably be slower.
# We just need to call evaluate for every item inside the population.
population_score = [evaluate(item) for item in population]
# Check if there is a matching evolution.
population_score = sorted(population_score, key=lambda x: x[1], reverse=True)
if population_score[0][0] == target:
return (generation, total_population, population_score[0][0])
# Print the best result every 10 generation.
# Just to know that the algorithm is working.
if debug and generation % 10 == 0:
print(
f"\nGeneration: {generation}"
f"\nTotal Population:{total_population}"
f"\nBest score: {population_score[0][1]}"
f"\nBest string: {population_score[0][0]}"
)
# Flush the old population, keeping some of the best evolutions.
# Keeping this avoid regression of evolution.
population_best = population[: int(N_POPULATION / 3)]
population.clear()
population.extend(population_best)
# Normalize population score to be between 0 and 1.
population_score = [
(item, score / len(target)) for item, score in population_score
]
# Select, crossover and mutate a new population.
"""Select is responsible for selecting a second parent and generating a new population.
The function takes a tuple containing the first parent's string and its fitness score as input"""
def select(parent_1: tuple[str, float]) -> list[str]:
"""Select the second parent and generate new population"""
pop = []
# Generate more children proportionally to the fitness score.
child_n = int(parent_1[1] * 100) + 1
child_n = 10 if child_n >= 10 else child_n
for _ in range(child_n):
parent_2 = population_score[ # noqa: B023
random.randint(0, N_SELECTED)
][0]
child_1, child_2 = crossover(parent_1[0], parent_2)
# Append new string to the population list.
pop.append(mutate(child_1))
pop.append(mutate(child_2))
return pop
"""This function defines a crossover operation between two parent strings,
where a random slice point is chosen and the two parts of the parent strings
are combined to form two child strings. The function returns a tuple containing the two child strings."""
def crossover(parent_1: str, parent_2: str) -> tuple[str, str]:
"""Slice and combine two string at a random point."""
random_slice = random.randint(0, len(parent_1) - 1)
child_1 = parent_1[:random_slice] + parent_2[random_slice:]
child_2 = parent_2[:random_slice] + parent_1[random_slice:]
return (child_1, child_2)
"""This function takes a string, child, as an argument and returns a string.
The function mutates a random gene of the child by replacing it with a random gene from a list called "genes".
The mutation occurs only if a random number generated by the "random.uniform" function is less than a constant value called
"MUTATION_PROBABILITY".
The code first converts the string child to a list called "child_list". It then selects a random index from the child_list
using "random.randint" and replaces the value at that index with a random gene from "genes".
Finally, the function returns the mutated child as a string by joining the "child_list"."""
def mutate(child: str) -> str:
"""Mutate a random gene of a child with another one from the list."""
child_list = list(child)
if random.uniform(0, 1) < MUTATION_PROBABILITY:
child_list[random.randint(0, len(child)) - 1] = random.choice(genes)
return "".join(child_list)
# This is selection
for i in range(N_SELECTED):
population.extend(select(population_score[int(i)]))
# Check if the population has already reached the maximum value and if so,
# break the cycle. If this check is disabled, the algorithm will take
# forever to compute large strings, but will also calculate small strings in
# a far fewer generations.
if len(population) > N_POPULATION:
break
if __name__ == "__main__":
target_str = (
"This is a genetic algorithm to evaluate, combine, evolve, and mutate a string!"
)
genes_list = list(
" ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklm"
"nopqrstuvwxyz.,;!?+-*#@^'èéòà€ù=)(&%$£/\\"
)
generation, population, target = basic(target_str, genes_list)
print(
f"\nGeneration: {generation}\nTotal Population: {population}\nTarget: {target}"
)