Skip to content

Commit c08c4b5

Browse files
committed
Added Burrows-Wheeler transform algorithm.
1 parent f0e1631 commit c08c4b5

File tree

1 file changed

+165
-0
lines changed

1 file changed

+165
-0
lines changed

compression/burrows_wheeler.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""
2+
https://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
3+
4+
The Burrows–Wheeler transform (BWT, also called block-sorting compression)
5+
rearranges a character string into runs of similar characters. This is useful
6+
for compression, since it tends to be easy to compress a string that has runs
7+
of repeated characters by techniques such as move-to-front transform and
8+
run-length encoding. More importantly, the transformation is reversible,
9+
without needing to store any additional data except the position of the first
10+
original character. The BWT is thus a "free" method of improving the efficiency
11+
of text compression algorithms, costing only some extra computation.
12+
"""
13+
14+
15+
def all_rotations(string):
16+
"""
17+
:param str string: The string that will be rotated len(string) times.
18+
:return: A list with len(string) rotations of the parameter string.
19+
:rtype: list[str]
20+
:raises TypeError: If the string parameter type is not str.
21+
Examples:
22+
23+
>>> all_rotations("^BANANA|")
24+
['^BANANA|', 'BANANA|^', 'ANANA|^B', 'NANA|^BA', 'ANA|^BAN', 'NA|^BANA',\
25+
'A|^BANAN', '|^BANANA']
26+
>>> all_rotations("a_asa_da_casa")
27+
['a_asa_da_casa', '_asa_da_casaa', 'asa_da_casaa_', 'sa_da_casaa_a',\
28+
'a_da_casaa_as', '_da_casaa_asa', 'da_casaa_asa_', 'a_casaa_asa_d',\
29+
'_casaa_asa_da', 'casaa_asa_da_', 'asaa_asa_da_c', 'saa_asa_da_ca',\
30+
'aa_asa_da_cas']
31+
>>> all_rotations("panamabanana")
32+
['panamabanana', 'anamabananap', 'namabananapa', 'amabananapan',\
33+
'mabananapana', 'abananapanam', 'bananapanama', 'ananapanamab',\
34+
'nanapanamaba', 'anapanamaban', 'napanamabana', 'apanamabanan']
35+
>>> all_rotations(5)
36+
Traceback (most recent call last):
37+
...
38+
TypeError: The parameter string type must be str.
39+
"""
40+
if not (type(string) is str):
41+
raise TypeError("The parameter string type must be str.")
42+
43+
return [string[i:] + string[:i] for i in range(len(string))]
44+
45+
46+
def bwt_transform(string):
47+
"""
48+
:param str string: The string that will be used at bwt algorithm
49+
:return: A dictionary with the bwt result, the string composed of the last
50+
char of each row of the ordered rotations list and the index of the
51+
original string at ordered rotations list
52+
:rtype: dict
53+
:raises TypeError: If the string parameter type is not str
54+
:raises ValueError: If the string parameter is empty
55+
Examples:
56+
57+
>>> bwt_transform("^BANANA")
58+
{'bwt_string': 'BNN^AAA', 'idx_original_string': 6}
59+
>>> bwt_transform("a_asa_da_casa")
60+
{'bwt_string': 'aaaadss_c__aa', 'idx_original_string': 3}
61+
>>> bwt_transform("panamabanana")
62+
{'bwt_string': 'mnpbnnaaaaaa', 'idx_original_string': 11}
63+
>>> bwt_transform(4)
64+
Traceback (most recent call last):
65+
...
66+
TypeError: The parameter string type must be str.
67+
>>> bwt_transform('')
68+
Traceback (most recent call last):
69+
...
70+
ValueError: The parameter string must not be empty.
71+
"""
72+
if not (type(string) is str):
73+
raise TypeError("The parameter string type must be str.")
74+
if not string:
75+
raise ValueError("The parameter string must not be empty.")
76+
77+
rotations = all_rotations(string)
78+
rotations.sort() # sort the list of rotations in alphabetically order
79+
# make a string composed of the last char of each rotation
80+
return {
81+
"bwt_string": "".join([word[-1] for word in rotations]),
82+
"idx_original_string": rotations.index(string),
83+
}
84+
85+
86+
def reverse_bwt(bwt_string, idx_original_string):
87+
"""
88+
:param str bwt_string: The string returned from bwt algorithm execution
89+
:param int idx_original_string: The index of the string that was used to
90+
generate bwt_string at ordered rotations list
91+
:return: The string used to generate bwt_string when bwt was executed
92+
:rtype str
93+
:raises TypeError: If the bwt_string parameter type is not str
94+
:raises ValueError: If the bwt_string parameter is empty
95+
:raises TypeError: If the idx_original_string type is not int or if not
96+
possible to cast it to int
97+
:raises ValueError: If the idx_original_string value is lower than 0
98+
99+
>>> reverse_bwt("BNN^AAA", 6)
100+
'^BANANA'
101+
>>> reverse_bwt("aaaadss_c__aa", 3)
102+
'a_asa_da_casa'
103+
>>> reverse_bwt("mnpbnnaaaaaa", 11)
104+
'panamabanana'
105+
>>> reverse_bwt(4, 11)
106+
Traceback (most recent call last):
107+
...
108+
TypeError: The parameter bwt_string type must be str.
109+
>>> reverse_bwt("", 11)
110+
Traceback (most recent call last):
111+
...
112+
ValueError: The parameter bwt_string must not be empty.
113+
>>> reverse_bwt("mnpbnnaaaaaa", "asd")
114+
Traceback (most recent call last):
115+
...
116+
TypeError: The parameter idx_original_string type must be int or passive of cast to int.
117+
>>> reverse_bwt("mnpbnnaaaaaa", -1)
118+
Traceback (most recent call last):
119+
...
120+
ValueError: The parameter idx_original_string must not be lower than 0.
121+
>>> reverse_bwt("mnpbnnaaaaaa", 11.0)
122+
'panamabanana'
123+
>>> reverse_bwt("mnpbnnaaaaaa", 11.4)
124+
'panamabanana'
125+
"""
126+
if not (type(bwt_string) is str):
127+
raise TypeError("The parameter bwt_string type must be str.")
128+
if not bwt_string:
129+
raise ValueError("The parameter bwt_string must not be empty.")
130+
try:
131+
idx_original_string = int(idx_original_string)
132+
except ValueError:
133+
raise TypeError(
134+
"The parameter idx_original_string type must be int or passive of cast to int."
135+
)
136+
if idx_original_string < 0:
137+
raise ValueError(
138+
"The parameter idx_original_string must not be lower than 0."
139+
)
140+
141+
ordered_rotations = [""] * len(bwt_string)
142+
for x in range(len(bwt_string)):
143+
for i in range(len(bwt_string)):
144+
ordered_rotations[i] = bwt_string[i] + ordered_rotations[i]
145+
ordered_rotations.sort()
146+
return ordered_rotations[idx_original_string]
147+
148+
149+
if __name__ == "__main__":
150+
string = input("Provide a string that I will generate its BWT transform: ")
151+
result = bwt_transform(string)
152+
print(
153+
"Burrows Wheeler tranform for string '{}' results in '{}'".format(
154+
string, result["bwt_string"]
155+
)
156+
)
157+
original_string = reverse_bwt(
158+
result["bwt_string"], result["idx_original_string"]
159+
)
160+
print(
161+
(
162+
"Reversing Burrows Wheeler tranform for entry '{}' we get original"
163+
" string '{}'"
164+
).format(result["bwt_string"], original_string)
165+
)

0 commit comments

Comments
 (0)