-
-
Notifications
You must be signed in to change notification settings - Fork 46.9k
Create lz78.py #11842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Ahmed99125
wants to merge
11
commits into
TheAlgorithms:master
Choose a base branch
from
Ahmed99125:patch-4
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Create lz78.py #11842
Changes from all commits
Commits
Show all changes
11 commits
Select commit
Hold shift + click to select a range
8269a63
Create lz78.py
Ahmed99125 7a2d33e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] b544f10
Update lz78.py
Ahmed99125 1afdf0f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] c307a60
Update lz78.py
Ahmed99125 d85fde2
Update compression/lz78.py
Ahmed99125 c7d1ece
Update lz78.py
Ahmed99125 34fe6bf
Update lz78.py
Ahmed99125 676da50
Update lz78.py
Ahmed99125 c27a91e
Update compression/lz78.py
Ahmed99125 bab8e63
Update lz78.py
Ahmed99125 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
""" | ||
Sources: | ||
https://en.wikipedia.org/wiki/LZ77_and_LZ78#LZ78 | ||
""" | ||
|
||
from dataclasses import dataclass | ||
|
||
__version__ = "1.0" | ||
__author__ = "Ahmed Tamer" | ||
|
||
|
||
@dataclass | ||
class Token: | ||
""" | ||
Dataclass representing pair called token consisting of the dictionary index | ||
and a single character that follows the phrase in the dictionary. | ||
""" | ||
|
||
index: int | ||
char: str | ||
|
||
def __repr__(self) -> str: | ||
""" | ||
>>> token = Token(1, "c") | ||
>>> repr(token) | ||
'(1, c)' | ||
>>> str(token) | ||
'(1, c)' | ||
""" | ||
return f"({self.index}, {self.char})" | ||
|
||
|
||
class LZ78Compressor: | ||
""" | ||
Class containing compress and decompress methods using LZ78 compression algorithm. | ||
""" | ||
|
||
def compress(self, text: str) -> list[Token]: | ||
""" | ||
Compress the given string text using LZ78 compression algorithm. | ||
|
||
Args: | ||
text: string to be compressed | ||
|
||
Returns: | ||
output: the compressed text as a list of Tokens | ||
|
||
Tests: | ||
>>> lz78_compressor = LZ78Compressor() | ||
>>> str(lz78_compressor.compress("ababcbababaa")) | ||
'[(0, a), (0, b), (1, b), (0, c), (2, a), (5, b), (1, a)]' | ||
>>> str(lz78_compressor.compress("aacaacabcabaaac")) | ||
'[(0, a), (1, c), (1, a), (0, c), (1, b), (4, a), (0, b), (3, a)]' | ||
>>> str(lz78_compressor.compress("")) | ||
'[]' | ||
>>> lz78_compressor.compress([]) | ||
Traceback (most recent call last): | ||
TypeError: Expected string. | ||
>>> lz78_compressor.compress({}) | ||
Traceback (most recent call last): | ||
TypeError: Expected string. | ||
>>> all(len(s) >= len(lz78_compressor.compress(s)) for s in ( | ||
... "", "AA", "AB", "AAA", "ABC", "ABCDEFGH")) | ||
True | ||
""" | ||
|
||
if not isinstance(text, str): | ||
raise TypeError("Expected string.") | ||
|
||
phrase_dict = {} | ||
tokens = [] | ||
code = 1 | ||
phrase = "" | ||
for char in text: | ||
phrase += char | ||
if phrase not in phrase_dict: | ||
phrase_dict[phrase] = str(code) | ||
if len(phrase) == 1: | ||
tokens.append(Token(0, phrase)) | ||
else: | ||
tokens.append(Token(int(phrase_dict[phrase[:-1]]), phrase[-1])) | ||
code += 1 | ||
phrase = "" | ||
return tokens | ||
|
||
def decompress(self, tokens: list[Token]) -> str: | ||
""" | ||
Convert the list of tokens into an output string. | ||
|
||
Args: | ||
tokens: list containing pairs (index, char) | ||
|
||
Returns: | ||
output: decompressed text | ||
|
||
Tests: | ||
>>> lz78_compressor = LZ78Compressor() | ||
>>> lz78_compressor.decompress([Token(0, 'c'), Token(0, 'a'), Token(0, 'b'), | ||
... Token(0, 'r'), Token(2, 'c'), Token(2, 'd'), Token(2, 'b'), Token(4, 'a'), | ||
... Token(4, 'r'), Token(2, 'r'), Token(8, 'd')]) | ||
'cabracadabrarrarrad' | ||
>>> lz78_compressor.decompress([Token(0, 'a'), Token(0, 'b'), Token(1, 'b'), | ||
... Token(0, 'c'), Token(2, 'a'), Token(5, 'b'), Token(1, 'a')]) | ||
'ababcbababaa' | ||
>>> lz78_compressor.decompress([Token(0, 'a'), Token(1, 'c'), Token(1, 'a'), | ||
... Token(0, 'c'), Token(1, 'b'), Token(4, 'a'), | ||
... Token(0, 'b'), Token(3, 'a')]) | ||
'aacaacabcabaaa' | ||
""" | ||
|
||
text = "" | ||
phrase_dict = {"0": ""} | ||
code = 1 | ||
for token in tokens: | ||
phrase = phrase_dict[str(token.index)] + token.char | ||
phrase_dict[str(code)] = phrase | ||
code += 1 | ||
text += phrase | ||
return text | ||
|
||
|
||
if __name__ == "__main__": | ||
from doctest import testmod | ||
|
||
testmod() | ||
|
||
lz78_compressor = LZ78Compressor() | ||
|
||
# Example | ||
text = "aacaacabcabaaa" | ||
tokens = lz78_compressor.compress(text) | ||
decompressed_text = lz78_compressor.decompress(tokens) | ||
assert decompressed_text == text, "Invalid result." |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.