|
1 |
| -from __future__ import annotations |
2 | 1 | import sys
|
| 2 | +from __future__ import annotations |
3 | 3 | from collections import defaultdict
|
4 | 4 |
|
5 | 5 | # Description for the ppm algorithm can be found at https://en.wikipedia.org/wiki/Prediction_by_partial_matching
|
6 | 6 |
|
7 | 7 |
|
8 | 8 | class PPMNode:
|
9 | 9 | def __init__(self) -> None:
|
10 |
| - # Initialize a PPMNode with a dictionary for child nodes and a count of total occurrences |
| 10 | + # Initialize a PPMNode with a dictionary for child nodes |
| 11 | + # and a count of total occurrences |
11 | 12 | self.counts: dict[str, PPMNode] = defaultdict(PPMNode)
|
12 | 13 | self.total: int = 0
|
13 | 14 |
|
@@ -47,7 +48,7 @@ def compress(self, data: str) -> list[float]:
|
47 | 48 | context = (context + symbol)[-self.order :] # Keep the context within order
|
48 | 49 |
|
49 | 50 | return compressed_output
|
50 |
| - |
| 51 | + |
51 | 52 | def encode_symbol(self, context: str, symbol: str) -> float:
|
52 | 53 | # Encode a symbol based on the current context and return its probability
|
53 | 54 | node = self.root
|
@@ -92,7 +93,8 @@ def decode_symbol(self, context: str, prob: float) -> str | None:
|
92 | 93 | else:
|
93 | 94 | return None # Return None if the context is not found
|
94 | 95 |
|
95 |
| - # Iterate through the children of the node to find the symbol matching the given probability |
| 96 | + # Iterate through the children of the node to |
| 97 | + # find the symbol matching the given probability |
96 | 98 | for symbol, child in node.counts.items():
|
97 | 99 | if child.total / node.total == prob:
|
98 | 100 | return symbol # Return the symbol if the probability matches
|
|
0 commit comments