@@ -24,17 +24,20 @@ import (
24
24
)
25
25
26
26
const (
27
- idxsz = 11 // Size of buffer indexes in bit, typically 10..13 bits
28
- lensz = 4 // Size of lookahead indexes in bit, typically 4..5 bits
29
- charsz = 8 // Size of encoded chars in bit
27
+ idxsz = 11 // Size of buffer indexes in bits, typically 10..13 bits.
28
+ lensz = 4 // Size of lookahead indexes in bits, typically 4..5 bits.
30
29
31
- threshold = 1 // If match length <= threshold then output one character
32
- bufsz = (1 << idxsz ) // buffer size
33
- looksz = ((1 << lensz ) + 1 ) // lookahead buffer size
34
- historysz = bufsz - looksz // history buffer size
30
+ charsz = 8 // Size of encoded chars in bits.
31
+ bytemask = 128 // Mask with a bit in 8th position. Used to iterate through bits of a char.
35
32
36
- charStartBit = true // Indicates next bits encode a char
37
- tokenStartBit = false // Indicates next bits encode a token
33
+ threshold = 1 // If match length > threshold then output a token (idx, len), otherwise output one char.
34
+
35
+ bufsz = (1 << idxsz ) // Buffer size.
36
+ looksz = ((1 << lensz ) + 1 ) // Lookahead buffer size.
37
+ historysz = bufsz - looksz // History buffer size.
38
+
39
+ charStartBit = true // Indicates next bits encode a char.
40
+ tokenStartBit = false // Indicates next bits encode a token.
38
41
)
39
42
40
43
func min (x , y int ) int {
@@ -44,6 +47,10 @@ func min(x, y int) int {
44
47
return y
45
48
}
46
49
50
+ // findLargestMatch looks for the largest sequence of characters (from current to current+ahead)
51
+ // contained in the history of the buffer.
52
+ // It returns the index of the found match, if any, and its length.
53
+ // The index is relative to the current position. If idx 0 is returned than no match has been found.
47
54
func findLargestMatch (buf []byte , current , size int ) (idx , len int ) {
48
55
idx = 0
49
56
len = 1
@@ -67,6 +74,8 @@ func findLargestMatch(buf []byte, current, size int) (idx, len int) {
67
74
return
68
75
}
69
76
77
+ // Encode takes a slice of bytes, compresses it using the lzss compression algorithm
78
+ // and returns the result in a new bytes buffer.
70
79
func Encode (data []byte ) []byte {
71
80
// buffer is made up of two parts: the first is for already processed data (history); the second is for new data
72
81
buffer := make ([]byte , bufsz * 2 )
@@ -104,6 +113,9 @@ func Encode(data []byte) []byte {
104
113
return out .bytes ()
105
114
}
106
115
116
+ // filler abstracts the process of consuming an input buffer
117
+ // using its bytes to fill another buffer.
118
+ // It's been used to facilitate the handling of the input buffer in the Encode function.
107
119
type filler struct {
108
120
src []byte
109
121
idx int
@@ -115,13 +127,21 @@ func newFiller(src []byte) *filler {
115
127
}
116
128
}
117
129
130
+ // fill tries to fill all the dst buffer with bytes read from src.
131
+ // It returns the number of bytes moved from src to dst.
132
+ // The src buffer offset is then incremented so that all the content of src
133
+ // can be consumed in small chunks.
118
134
func (f * filler ) fill (dst []byte ) int {
119
135
n := min (len (f .src )- f .idx , len (dst ))
120
136
copy (dst , f .src [f .idx :f .idx + n ])
121
137
f .idx += n
122
138
return n
123
139
}
124
140
141
+ // result is responsible for storing the actual result of the encoding.
142
+ // It knows how to store characters and tokens in the resulting buffer.
143
+ // It must be flushed at the end of the encoding in order to store the
144
+ // remaining bits of bitBuffer.
125
145
type result struct {
126
146
bitBuffer int
127
147
bitMask int
@@ -131,11 +151,12 @@ type result struct {
131
151
func newResult () * result {
132
152
return & result {
133
153
bitBuffer : 0 ,
134
- bitMask : 128 ,
135
- out : bytes .NewBufferString ( "" ) ,
154
+ bitMask : bytemask ,
155
+ out : & bytes.Buffer {} ,
136
156
}
137
157
}
138
158
159
+ // addChar stores a char in the out buffer.
139
160
func (r * result ) addChar (c byte ) {
140
161
i := int (c )
141
162
r .putbit (charStartBit )
@@ -145,6 +166,7 @@ func (r *result) addChar(c byte) {
145
166
}
146
167
}
147
168
169
+ // addToken stores a token in the out buffer.
148
170
func (r * result ) addToken (idx , len int ) {
149
171
// Adjust idx and len to fit idxsz and lensz bits respectively
150
172
idx &= (bufsz - 1 )
@@ -163,11 +185,13 @@ func (r *result) addToken(idx, len int) {
163
185
}
164
186
165
187
func (r * result ) flush () {
166
- if r .bitMask != 128 {
188
+ if r .bitMask != bytemask {
167
189
r .out .WriteByte (byte (r .bitBuffer ))
168
190
}
169
191
}
170
192
193
+ // putbit puts the passed bit (true -> 1; false -> 0) in the bitBuffer.
194
+ // When bitBuffer contains an entire byte it's written to the out buffer.
171
195
func (r * result ) putbit (b bool ) {
172
196
if b {
173
197
r .bitBuffer |= r .bitMask
@@ -176,7 +200,7 @@ func (r *result) putbit(b bool) {
176
200
if r .bitMask == 0 {
177
201
r .out .WriteByte (byte (r .bitBuffer ))
178
202
r .bitBuffer = 0
179
- r .bitMask = 128
203
+ r .bitMask = bytemask
180
204
}
181
205
}
182
206
0 commit comments