|
8 | 8 | __version__ = "1.0"
|
9 | 9 | __author__ = "Ahmed Tamer"
|
10 | 10 |
|
| 11 | + |
11 | 12 | @dataclass
|
12 | 13 | class Token:
|
13 |
| - """ |
14 |
| - Dataclass representing pair called token consisting of the dictionary index and a single character that follows the phrase in the dictionary. |
15 |
| - """ |
16 |
| - |
17 |
| - index: int |
18 |
| - char: str |
19 |
| - |
20 |
| - def __repr__(self) -> str: |
21 | 14 | """
|
22 |
| - >>> token = Token(1, "c") |
23 |
| - >>> repr(token) |
24 |
| - '(1, c)' |
25 |
| - >>> str(token) |
26 |
| - '(1, c)' |
| 15 | + Dataclass representing pair called token consisting of the dictionary index and a single character that follows the phrase in the dictionary. |
27 | 16 | """
|
28 |
| - return f"({self.index}, {self.char})" |
29 |
| - |
| 17 | + |
| 18 | + index: int |
| 19 | + char: str |
| 20 | + |
| 21 | + def __repr__(self) -> str: |
| 22 | + """ |
| 23 | + >>> token = Token(1, "c") |
| 24 | + >>> repr(token) |
| 25 | + '(1, c)' |
| 26 | + >>> str(token) |
| 27 | + '(1, c)' |
| 28 | + """ |
| 29 | + return f"({self.index}, {self.char})" |
| 30 | + |
30 | 31 |
|
31 | 32 | class LZ78Compressor:
|
32 |
| - """ |
33 |
| - Class containing compress and decompress methods using LZ78 compression algorithm. |
34 |
| - """ |
35 |
| - |
36 |
| - def compress(self, text: str) -> list[Token]: |
37 | 33 | """
|
38 |
| - Compress the given string text using LZ78 compression algorithm. |
39 |
| -
|
40 |
| - Args: |
41 |
| - text: string to be compressed |
42 |
| -
|
43 |
| - Returns: |
44 |
| - output: the compressed text as a list of Tokens |
45 |
| - |
46 |
| - Tests: |
47 |
| - >>> lz78_compressor = LZ78Compressor() |
48 |
| - >>> str(lz78_compressor.compress("ababcbababaa")) |
49 |
| - '[(0, a), (0, b), (1, b), (0, c), (2, a), (5, b), (1, a)]' |
50 |
| - >>> str(lz78_compressor.compress("aacaacabcabaaac")) |
51 |
| - '[(0, a), (1, c), (1, a), (0, c), (1, b), (4, a), (0, b), (3, a)]' |
| 34 | + Class containing compress and decompress methods using LZ78 compression algorithm. |
52 | 35 | """
|
53 | 36 |
|
54 |
| - phrase_dict = {} |
55 |
| - tokens = [] |
56 |
| - code = 1 |
57 |
| - phrase = '' |
58 |
| - for char in text: |
59 |
| - phrase += char |
60 |
| - if phrase not in phrase_dict: |
61 |
| - phrase_dict[phrase] = str(code) |
62 |
| - if len(phrase) == 1: |
63 |
| - tokens.append(Token('0', phrase)) |
64 |
| - else: |
65 |
| - tokens.append(Token(phrase_dict[phrase[:-1]], phrase[-1])) |
66 |
| - code += 1 |
67 |
| - phrase = '' |
68 |
| - return tokens |
69 |
| - |
70 |
| - |
71 |
| - def decompress(self, tokens: list[Token]) -> str: |
72 |
| - """ |
| 37 | + def compress(self, text: str) -> list[Token]: |
| 38 | + """ |
| 39 | + Compress the given string text using LZ78 compression algorithm. |
| 40 | +
|
| 41 | + Args: |
| 42 | + text: string to be compressed |
| 43 | +
|
| 44 | + Returns: |
| 45 | + output: the compressed text as a list of Tokens |
| 46 | +
|
| 47 | + Tests: |
| 48 | + >>> lz78_compressor = LZ78Compressor() |
| 49 | + >>> str(lz78_compressor.compress("ababcbababaa")) |
| 50 | + '[(0, a), (0, b), (1, b), (0, c), (2, a), (5, b), (1, a)]' |
| 51 | + >>> str(lz78_compressor.compress("aacaacabcabaaac")) |
| 52 | + '[(0, a), (1, c), (1, a), (0, c), (1, b), (4, a), (0, b), (3, a)]' |
| 53 | + """ |
| 54 | + |
| 55 | + phrase_dict = {} |
| 56 | + tokens = [] |
| 57 | + code = 1 |
| 58 | + phrase = "" |
| 59 | + for char in text: |
| 60 | + phrase += char |
| 61 | + if phrase not in phrase_dict: |
| 62 | + phrase_dict[phrase] = str(code) |
| 63 | + if len(phrase) == 1: |
| 64 | + tokens.append(Token("0", phrase)) |
| 65 | + else: |
| 66 | + tokens.append(Token(phrase_dict[phrase[:-1]], phrase[-1])) |
| 67 | + code += 1 |
| 68 | + phrase = "" |
| 69 | + return tokens |
| 70 | + |
| 71 | + def decompress(self, tokens: list[Token]) -> str: |
| 72 | + """ |
73 | 73 | Convert the list of tokens into an output string.
|
74 | 74 |
|
75 | 75 | Args:
|
76 | 76 | tokens: list containing pairs (index, char)
|
77 | 77 |
|
78 | 78 | Returns:
|
79 | 79 | output: decompressed text
|
80 |
| - |
| 80 | +
|
81 | 81 | Tests:
|
82 | 82 | >>> lz78_compressor = LZ78Compressor()
|
83 |
| - >>> lz78_compressor.decompress([Token(0, 'c'), Token(0, 'a'), Token(0, 'b'), Token(0, 'r'), Token(2, 'c'), |
| 83 | + >>> lz78_compressor.decompress([Token(0, 'c'), Token(0, 'a'), Token(0, 'b'), Token(0, 'r'), Token(2, 'c'), |
84 | 84 | ... Token(2, 'd'), Token(2, 'b'), Token(4, 'a'), Token(4, 'r'), Token(2, 'r'), Token(8, 'd')])
|
85 | 85 | 'cabracadabrarrarrad'
|
86 |
| - >>> lz78_compressor.decompress([Token(0, 'a'), Token(0, 'b'), Token(1, 'b'), Token(0, 'c'), |
| 86 | + >>> lz78_compressor.decompress([Token(0, 'a'), Token(0, 'b'), Token(1, 'b'), Token(0, 'c'), |
87 | 87 | ... Token(2, 'a'), Token(5, 'b'), Token(1, 'a')])
|
88 | 88 | 'ababcbababaa'
|
89 |
| - >>> lz78_compressor.decompress([Token(0, 'a'), Token(1, 'c'), Token(1, 'a'), Token(0, 'c'), |
| 89 | + >>> lz78_compressor.decompress([Token(0, 'a'), Token(1, 'c'), Token(1, 'a'), Token(0, 'c'), |
90 | 90 | ... Token(1, 'b'), Token(4, 'a'), Token(0, 'b'), Token(3, 'a')])
|
91 | 91 | 'aacaacabcabaaa'
|
92 |
| - """ |
| 92 | + """ |
| 93 | + |
| 94 | + text = "" |
| 95 | + phrase_dict = {"0": ""} |
| 96 | + code = 1 |
| 97 | + for token in tokens: |
| 98 | + phrase = phrase_dict[str(token.index)] + token.char |
| 99 | + phrase_dict[str(code)] = phrase |
| 100 | + code += 1 |
| 101 | + text += phrase |
| 102 | + return text |
| 103 | + |
| 104 | + |
| 105 | +if __name__ == "__main__": |
| 106 | + from doctest import testmod |
| 107 | + |
| 108 | + testmod() |
| 109 | + |
| 110 | + lz78_compressor = LZ78Compressor() |
93 | 111 |
|
94 |
| - text = '' |
95 |
| - phrase_dict = {'0': ''} |
96 |
| - code = 1 |
97 |
| - for token in tokens: |
98 |
| - phrase = phrase_dict[str(token.index)] + token.char |
99 |
| - phrase_dict[str(code)] = phrase |
100 |
| - code += 1 |
101 |
| - text += phrase |
102 |
| - return text |
103 |
| - |
104 |
| - |
105 |
| -if __name__ == '__main__': |
106 |
| - from doctest import testmod |
107 |
| - |
108 |
| - testmod() |
109 |
| - |
110 |
| - |
111 |
| - lz78_compressor = LZ78Compressor() |
112 |
| - |
113 |
| - # Example |
114 |
| - text = 'aacaacabcabaaa' |
115 |
| - tokens = lz78_compressor.compress(text) |
116 |
| - decompressedText = lz78_compressor.decompress(tokens) |
117 |
| - assert decompressedText == text, 'Invalid result.' |
| 112 | + # Example |
| 113 | + text = "aacaacabcabaaa" |
| 114 | + tokens = lz78_compressor.compress(text) |
| 115 | + decompressedText = lz78_compressor.decompress(tokens) |
| 116 | + assert decompressedText == text, "Invalid result." |
0 commit comments