Skip to content

Commit d595b67

Browse files
committed
Restore close to original implementations of parse_file and huffman functions as they are more memory efficient for large files; add more options to huffman function; add another doctest to huffman function
1 parent dfb26b2 commit d595b67

File tree

2 files changed

+37
-13
lines changed

2 files changed

+37
-13
lines changed

compression/huffman.py

Lines changed: 35 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
import sys
44
import doctest
5+
from typing import Optional, TYPE_CHECKING
6+
7+
if TYPE_CHECKING:
8+
from _typeshed import SupportsWrite
59

610

711
class Letter:
@@ -54,9 +58,14 @@ def parse_file(file_path: str) -> list[Letter]:
5458
>>> out1
5559
[T:1, h:1, a:1, e:1, i:2, t:2, s:3, :3, .:3]
5660
"""
57-
with open(file_path, "r", encoding="utf8") as file:
58-
string = file.read()
59-
return parse_string(string)
61+
chars: dict[str, Letter] = {}
62+
with open(file_path, "r", encoding="utf8") as input_file:
63+
while char:=input_file.read(1):
64+
if char not in chars:
65+
chars[char] = Letter(char, 1)
66+
else:
67+
chars[char].freq += 1
68+
return sorted(chars.values(), key=lambda letter: letter.freq)
6069

6170

6271
def build_tree(letters: list[Letter]) -> Letter | TreeNode:
@@ -133,13 +142,13 @@ def traverse_tree(root: Letter | TreeNode, bitstring: str = "") -> list[Letter]:
133142
return letters
134143

135144

136-
def huffman_string(string: str, sep=" ") -> str:
145+
def huffman_string(string: str, *, sep=" ") -> str:
137146
"""
138147
Return huffman coded string, with
139-
letter bitstrings separated by sep parameter
148+
each bitstring separated by sep parameter
140149
>>> huffman_string("goose")
141150
'00 11 11 01 10'
142-
>>> huffman_string("This is a test...", "")
151+
>>> huffman_string("This is a test...", sep="")
143152
'1000100101011011101011011110101110111011110011000000'
144153
"""
145154
letters_list = parse_string(string)
@@ -150,17 +159,30 @@ def huffman_string(string: str, sep=" ") -> str:
150159
return sep.join(letter_bitstrings[char] for char in string)
151160

152161

153-
def huffman(file_path: str) -> None:
162+
def huffman(file_path: str, *, sep=" ", output_file: 'Optional[SupportsWrite[str]]' = None) -> None:
154163
"""
155-
Parse the file, huffman code it and print the result
164+
Parse the file, Huffman Code it and print the result
165+
to the given output_file, with each bitstring
166+
separated by sep parameter
156167
>>> huffman("text_data/text_original.txt")
157168
Huffman Coding of text_data/text_original.txt:
158-
1000 1001 010 110 111 010 110 111 1010 111 011 1011 110 011 00 00 00
169+
1000 1001 010 110 111 010 110 111 1010 111 011 1011 110 011 00 00 00
170+
>>> with open("text_data/text_huffman.txt", "w", encoding="utf8") as output_file_in1: huffman("text_data/text_original.txt", sep="", output_file=output_file_in1)
171+
>>> with open("text_data/text_huffman.txt", "r", encoding="utf8") as output_file_out1: print(output_file_out1.read())
172+
Huffman Coding of text_data/text_original.txt:
173+
1000100101011011101011011110101110111011110011000000
174+
<BLANKLINE>
159175
"""
160-
with open(file_path, "r", encoding="utf8") as file:
161-
string = file.read()
162-
result = huffman_string(string, " ")
163-
print(f"Huffman Coding of {file_path}:\n{result}")
176+
letters_list = parse_file(file_path)
177+
root = build_tree(letters_list)
178+
letter_bitstrings = {
179+
k: v for letter in traverse_tree(root) for k, v in letter.bitstring.items()
180+
}
181+
print(f"Huffman Coding of {file_path}:", file=output_file)
182+
with open(file_path, "r", encoding="utf8") as input_file:
183+
while char:=input_file.read(1):
184+
print(letter_bitstrings[char], end=sep, file=output_file)
185+
print(file=output_file)
164186

165187

166188
if __name__ == "__main__":
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Huffman Coding of text_data/text_original.txt:
2+
1000100101011011101011011110101110111011110011000000

0 commit comments

Comments
 (0)