Skip to content

Commit 7a2d33e

Browse files
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent 8269a63 commit 7a2d33e

File tree

1 file changed

+81
-82
lines changed

1 file changed

+81
-82
lines changed

compression/lz78.py

+81-82
Original file line numberDiff line numberDiff line change
@@ -8,110 +8,109 @@
88
__version__ = "1.0"
99
__author__ = "Ahmed Tamer"
1010

11+
1112
@dataclass
1213
class Token:
13-
"""
14-
Dataclass representing pair called token consisting of the dictionary index and a single character that follows the phrase in the dictionary.
15-
"""
16-
17-
index: int
18-
char: str
19-
20-
def __repr__(self) -> str:
2114
"""
22-
>>> token = Token(1, "c")
23-
>>> repr(token)
24-
'(1, c)'
25-
>>> str(token)
26-
'(1, c)'
15+
Dataclass representing pair called token consisting of the dictionary index and a single character that follows the phrase in the dictionary.
2716
"""
28-
return f"({self.index}, {self.char})"
29-
17+
18+
index: int
19+
char: str
20+
21+
def __repr__(self) -> str:
22+
"""
23+
>>> token = Token(1, "c")
24+
>>> repr(token)
25+
'(1, c)'
26+
>>> str(token)
27+
'(1, c)'
28+
"""
29+
return f"({self.index}, {self.char})"
30+
3031

3132
class LZ78Compressor:
32-
"""
33-
Class containing compress and decompress methods using LZ78 compression algorithm.
34-
"""
35-
36-
def compress(self, text: str) -> list[Token]:
3733
"""
38-
Compress the given string text using LZ78 compression algorithm.
39-
40-
Args:
41-
text: string to be compressed
42-
43-
Returns:
44-
output: the compressed text as a list of Tokens
45-
46-
Tests:
47-
>>> lz78_compressor = LZ78Compressor()
48-
>>> str(lz78_compressor.compress("ababcbababaa"))
49-
'[(0, a), (0, b), (1, b), (0, c), (2, a), (5, b), (1, a)]'
50-
>>> str(lz78_compressor.compress("aacaacabcabaaac"))
51-
'[(0, a), (1, c), (1, a), (0, c), (1, b), (4, a), (0, b), (3, a)]'
34+
Class containing compress and decompress methods using LZ78 compression algorithm.
5235
"""
5336

54-
phrase_dict = {}
55-
tokens = []
56-
code = 1
57-
phrase = ''
58-
for char in text:
59-
phrase += char
60-
if phrase not in phrase_dict:
61-
phrase_dict[phrase] = str(code)
62-
if len(phrase) == 1:
63-
tokens.append(Token('0', phrase))
64-
else:
65-
tokens.append(Token(phrase_dict[phrase[:-1]], phrase[-1]))
66-
code += 1
67-
phrase = ''
68-
return tokens
69-
70-
71-
def decompress(self, tokens: list[Token]) -> str:
72-
"""
37+
def compress(self, text: str) -> list[Token]:
38+
"""
39+
Compress the given string text using LZ78 compression algorithm.
40+
41+
Args:
42+
text: string to be compressed
43+
44+
Returns:
45+
output: the compressed text as a list of Tokens
46+
47+
Tests:
48+
>>> lz78_compressor = LZ78Compressor()
49+
>>> str(lz78_compressor.compress("ababcbababaa"))
50+
'[(0, a), (0, b), (1, b), (0, c), (2, a), (5, b), (1, a)]'
51+
>>> str(lz78_compressor.compress("aacaacabcabaaac"))
52+
'[(0, a), (1, c), (1, a), (0, c), (1, b), (4, a), (0, b), (3, a)]'
53+
"""
54+
55+
phrase_dict = {}
56+
tokens = []
57+
code = 1
58+
phrase = ""
59+
for char in text:
60+
phrase += char
61+
if phrase not in phrase_dict:
62+
phrase_dict[phrase] = str(code)
63+
if len(phrase) == 1:
64+
tokens.append(Token("0", phrase))
65+
else:
66+
tokens.append(Token(phrase_dict[phrase[:-1]], phrase[-1]))
67+
code += 1
68+
phrase = ""
69+
return tokens
70+
71+
def decompress(self, tokens: list[Token]) -> str:
72+
"""
7373
Convert the list of tokens into an output string.
7474
7575
Args:
7676
tokens: list containing pairs (index, char)
7777
7878
Returns:
7979
output: decompressed text
80-
80+
8181
Tests:
8282
>>> lz78_compressor = LZ78Compressor()
83-
>>> lz78_compressor.decompress([Token(0, 'c'), Token(0, 'a'), Token(0, 'b'), Token(0, 'r'), Token(2, 'c'),
83+
>>> lz78_compressor.decompress([Token(0, 'c'), Token(0, 'a'), Token(0, 'b'), Token(0, 'r'), Token(2, 'c'),
8484
... Token(2, 'd'), Token(2, 'b'), Token(4, 'a'), Token(4, 'r'), Token(2, 'r'), Token(8, 'd')])
8585
'cabracadabrarrarrad'
86-
>>> lz78_compressor.decompress([Token(0, 'a'), Token(0, 'b'), Token(1, 'b'), Token(0, 'c'),
86+
>>> lz78_compressor.decompress([Token(0, 'a'), Token(0, 'b'), Token(1, 'b'), Token(0, 'c'),
8787
... Token(2, 'a'), Token(5, 'b'), Token(1, 'a')])
8888
'ababcbababaa'
89-
>>> lz78_compressor.decompress([Token(0, 'a'), Token(1, 'c'), Token(1, 'a'), Token(0, 'c'),
89+
>>> lz78_compressor.decompress([Token(0, 'a'), Token(1, 'c'), Token(1, 'a'), Token(0, 'c'),
9090
... Token(1, 'b'), Token(4, 'a'), Token(0, 'b'), Token(3, 'a')])
9191
'aacaacabcabaaa'
92-
"""
92+
"""
93+
94+
text = ""
95+
phrase_dict = {"0": ""}
96+
code = 1
97+
for token in tokens:
98+
phrase = phrase_dict[str(token.index)] + token.char
99+
phrase_dict[str(code)] = phrase
100+
code += 1
101+
text += phrase
102+
return text
103+
104+
105+
if __name__ == "__main__":
106+
from doctest import testmod
107+
108+
testmod()
109+
110+
lz78_compressor = LZ78Compressor()
93111

94-
text = ''
95-
phrase_dict = {'0': ''}
96-
code = 1
97-
for token in tokens:
98-
phrase = phrase_dict[str(token.index)] + token.char
99-
phrase_dict[str(code)] = phrase
100-
code += 1
101-
text += phrase
102-
return text
103-
104-
105-
if __name__ == '__main__':
106-
from doctest import testmod
107-
108-
testmod()
109-
110-
111-
lz78_compressor = LZ78Compressor()
112-
113-
# Example
114-
text = 'aacaacabcabaaa'
115-
tokens = lz78_compressor.compress(text)
116-
decompressedText = lz78_compressor.decompress(tokens)
117-
assert decompressedText == text, 'Invalid result.'
112+
# Example
113+
text = "aacaacabcabaaa"
114+
tokens = lz78_compressor.compress(text)
115+
decompressedText = lz78_compressor.decompress(tokens)
116+
assert decompressedText == text, "Invalid result."

0 commit comments

Comments
 (0)