2
2
3
3
import sys
4
4
import doctest
5
+ from typing import Optional , TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING :
8
+ from _typeshed import SupportsWrite
5
9
6
10
7
11
class Letter :
@@ -54,9 +58,14 @@ def parse_file(file_path: str) -> list[Letter]:
54
58
>>> out1
55
59
[T:1, h:1, a:1, e:1, i:2, t:2, s:3, :3, .:3]
56
60
"""
57
- with open (file_path , "r" , encoding = "utf8" ) as file :
58
- string = file .read ()
59
- return parse_string (string )
61
+ chars : dict [str , Letter ] = {}
62
+ with open (file_path , "r" , encoding = "utf8" ) as input_file :
63
+ while char := input_file .read (1 ):
64
+ if char not in chars :
65
+ chars [char ] = Letter (char , 1 )
66
+ else :
67
+ chars [char ].freq += 1
68
+ return sorted (chars .values (), key = lambda letter : letter .freq )
60
69
61
70
62
71
def build_tree (letters : list [Letter ]) -> Letter | TreeNode :
@@ -133,13 +142,13 @@ def traverse_tree(root: Letter | TreeNode, bitstring: str = "") -> list[Letter]:
133
142
return letters
134
143
135
144
136
- def huffman_string (string : str , sep = " " ) -> str :
145
+ def huffman_string (string : str , * , sep = " " ) -> str :
137
146
"""
138
147
Return huffman coded string, with
139
- letter bitstrings separated by sep parameter
148
+ each bitstring separated by sep parameter
140
149
>>> huffman_string("goose")
141
150
'00 11 11 01 10'
142
- >>> huffman_string("This is a test...", "")
151
+ >>> huffman_string("This is a test...", sep= "")
143
152
'1000100101011011101011011110101110111011110011000000'
144
153
"""
145
154
letters_list = parse_string (string )
@@ -150,17 +159,30 @@ def huffman_string(string: str, sep=" ") -> str:
150
159
return sep .join (letter_bitstrings [char ] for char in string )
151
160
152
161
153
- def huffman (file_path : str ) -> None :
162
+ def huffman (file_path : str , * , sep = " " , output_file : 'Optional[SupportsWrite[str]]' = None ) -> None :
154
163
"""
155
- Parse the file, huffman code it and print the result
164
+ Parse the file, Huffman Code it and print the result
165
+ to the given output_file, with each bitstring
166
+ separated by sep parameter
156
167
>>> huffman("text_data/text_original.txt")
157
168
Huffman Coding of text_data/text_original.txt:
158
- 1000 1001 010 110 111 010 110 111 1010 111 011 1011 110 011 00 00 00
169
+ 1000 1001 010 110 111 010 110 111 1010 111 011 1011 110 011 00 00 00
170
+ >>> with open("text_data/text_huffman.txt", "w", encoding="utf8") as output_file_in1: huffman("text_data/text_original.txt", sep="", output_file=output_file_in1)
171
+ >>> with open("text_data/text_huffman.txt", "r", encoding="utf8") as output_file_out1: print(output_file_out1.read())
172
+ Huffman Coding of text_data/text_original.txt:
173
+ 1000100101011011101011011110101110111011110011000000
174
+ <BLANKLINE>
159
175
"""
160
- with open (file_path , "r" , encoding = "utf8" ) as file :
161
- string = file .read ()
162
- result = huffman_string (string , " " )
163
- print (f"Huffman Coding of { file_path } :\n { result } " )
176
+ letters_list = parse_file (file_path )
177
+ root = build_tree (letters_list )
178
+ letter_bitstrings = {
179
+ k : v for letter in traverse_tree (root ) for k , v in letter .bitstring .items ()
180
+ }
181
+ print (f"Huffman Coding of { file_path } :" , file = output_file )
182
+ with open (file_path , "r" , encoding = "utf8" ) as input_file :
183
+ while char := input_file .read (1 ):
184
+ print (letter_bitstrings [char ], end = sep , file = output_file )
185
+ print (file = output_file )
164
186
165
187
166
188
if __name__ == "__main__" :
0 commit comments