Skip to content

Commit 4db57fd

Browse files
committed
Refine algorithm
1 parent 0881b34 commit 4db57fd

File tree

1 file changed

+120
-145
lines changed

1 file changed

+120
-145
lines changed

internal/lzss/lzss.go

+120-145
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// This code is a go port of LZSS encoder-decoder (Haruhiko Okumura; public domain)
2+
//
13
// This file is part of arduino-cloud-cli.
24
//
35
// Copyright (C) 2021 ARDUINO SA (http://www.arduino.cc/)
@@ -19,26 +21,20 @@ package lzss
1921

2022
import (
2123
"bytes"
22-
"io"
2324
)
2425

2526
const (
26-
ei = 11 /* typically 10..13 */
27-
ej = 4 /* typically 4..5 */
28-
p = 1 /* If match length <= P then output one character */
29-
bufsz = (1 << ei) /* buffer size */
30-
looksz = ((1 << ej) + 1) /* lookahead buffer size */
31-
)
27+
idxsz = 11 // Size of buffer indexes in bit, typically 10..13 bits
28+
lensz = 4 // Size of lookahead indexes in bit, typically 4..5 bits
29+
charsz = 8 // Size of encoded chars in bit
3230

33-
var (
34-
codecount = 0
35-
bit_buffer = 0
36-
bit_mask = 128
37-
EI = 11 /* typically 10..13 */
38-
EJ = 4 /* typically 4..5 */
39-
P = 1 /* If match length <= P then output one character */
40-
N = (1 << EI) /* buffer size */
41-
F = ((1 << EJ) + 1) /* lookahead buffer size */
31+
threshold = 1 // If match length <= threshold then output one character
32+
bufsz = (1 << idxsz) // buffer size
33+
looksz = ((1 << lensz) + 1) // lookahead buffer size
34+
historysz = bufsz - looksz // history buffer size
35+
36+
charStartBit = true // Indicates next bits encode a char
37+
tokenStartBit = false // Indicates next bits encode a token
4238
)
4339

4440
func min(x, y int) int {
@@ -48,163 +44,142 @@ func min(x, y int) int {
4844
return y
4945
}
5046

51-
func contains(buf []byte, el []byte) (ok bool, ln int, idx int) {
52-
for i := 0; i < len(buf)-looksz; i++ {
53-
54-
// }
55-
// for i, e := range buf {
56-
// Skip mismatching elements
57-
// if el[0] != e {
58-
if buf[i] != el[0] {
59-
continue
60-
}
61-
62-
// Check bounds
63-
ahead := min(looksz, len(buf)-i)
64-
ahead = min(ahead, len(el))
65-
66-
// Count number of bytes contained
67-
var j int
68-
for j = 1; j < ahead; j++ {
69-
if buf[i+j] != el[j] {
70-
break
47+
func findLargestMatch(buf []byte, current, size int) (idx, len int) {
48+
idx = 0
49+
len = 1
50+
ahead := min(looksz, size-current)
51+
history := current - (historysz)
52+
c := buf[current]
53+
for i := current - 1; i >= history; i-- {
54+
if buf[i] == c {
55+
var j int
56+
for j = 1; j < ahead; j++ {
57+
if buf[i+j] != buf[current+j] {
58+
break
59+
}
60+
}
61+
if j > len {
62+
idx = i
63+
len = j
7164
}
72-
}
73-
// store the largest result
74-
if j > ln {
75-
ok, ln, idx = true, j, i
7665
}
7766
}
7867
return
7968
}
8069

81-
func putbit1(out io.Writer) {
82-
bit_buffer |= bit_mask
83-
bit_mask = bit_mask >> 1
84-
if bit_mask == 0 {
85-
out.Write([]byte{byte(bit_buffer)})
86-
bit_buffer = 0
87-
bit_mask = 128
70+
func Encode(data []byte) []byte {
71+
// buffer is made up of two parts: the first is for already processed data (history); the second is for new data
72+
buffer := make([]byte, bufsz*2)
73+
// Initialize the old-data part (history) of the buffer
74+
for i := 0; i < historysz; i++ {
75+
buffer[i] = ' '
8876
}
89-
}
77+
out := newResult()
78+
in := newFiller(data)
79+
80+
// Fill the new-data part of the buffer
81+
n := in.fill(buffer[historysz:])
82+
bufferend := historysz + n
83+
for current := historysz; current < bufferend; {
84+
idx, len := findLargestMatch(buffer, current, bufferend)
85+
if len <= threshold {
86+
out.addChar(buffer[current])
87+
len = 1
88+
} else {
89+
out.addToken(idx, len)
90+
}
9091

91-
func putbit0(out io.Writer) {
92-
bit_mask = bit_mask >> 1
93-
if bit_mask == 0 {
94-
out.Write([]byte{byte(bit_buffer)})
95-
bit_buffer = 0
96-
bit_mask = 128
92+
current += len
93+
if current >= bufsz*2-looksz {
94+
// Shift processed bytes to the old-data portion of the buffer
95+
copy(buffer[:bufsz], buffer[bufsz:])
96+
current -= bufsz
97+
// Refill the new-data portion of the buffer
98+
bufferend -= bufsz
99+
bufferend += in.fill(buffer[bufferend:])
100+
}
97101
}
98-
}
99102

100-
func flush_bit_buffer(out io.Writer) {
101-
if bit_mask != 128 {
102-
out.Write([]byte{byte(bit_buffer)})
103-
}
103+
out.flush()
104+
return out.bytes()
104105
}
105106

106-
func output1(out io.Writer, c int) {
107-
putbit1(out)
107+
type filler struct {
108+
src []byte
109+
idx int
110+
}
108111

109-
for mask := 256 >> 1; mask != 0; mask = mask >> 1 {
110-
if c&mask != 0 {
111-
putbit1(out)
112-
} else {
113-
putbit0(out)
114-
}
112+
func newFiller(src []byte) *filler {
113+
return &filler{
114+
src: src,
115115
}
116116
}
117117

118-
func output2(out io.Writer, x, y int) {
119-
putbit0(out)
118+
func (f *filler) fill(dst []byte) int {
119+
n := min(len(f.src)-f.idx, len(dst))
120+
copy(dst, f.src[f.idx:f.idx+n])
121+
f.idx += n
122+
return n
123+
}
120124

121-
for mask := N >> 1; mask != 0; mask = mask >> 1 {
122-
if x&mask != 0 {
123-
putbit1(out)
124-
} else {
125-
putbit0(out)
126-
}
127-
}
125+
type result struct {
126+
bitBuffer int
127+
bitMask int
128+
out *bytes.Buffer
129+
}
128130

129-
for mask := (1 << EJ) >> 1; mask != 0; mask = mask >> 1 {
130-
if y&mask != 0 {
131-
putbit1(out)
132-
} else {
133-
putbit0(out)
134-
}
131+
func newResult() *result {
132+
return &result{
133+
bitBuffer: 0,
134+
bitMask: 128,
135+
out: bytes.NewBufferString(""),
135136
}
136137
}
137138

138-
func Encode(data []byte) []byte {
139-
bit_buffer = 0
140-
bit_mask = 128
141-
out := bytes.NewBufferString("")
142-
in := bytes.NewReader(data)
139+
func (r *result) addChar(c byte) {
140+
i := int(c)
141+
r.putbit(charStartBit)
142+
for mask := (1 << charsz) >> 1; mask != 0; mask = mask >> 1 {
143+
b := i&mask != 0
144+
r.putbit(b)
145+
}
146+
}
143147

144-
var i, j, f1, x, y, r, s, bufferend int
145-
var c byte
148+
func (r *result) addToken(idx, len int) {
149+
// Adjust idx and len to fit idxsz and lensz bits respectively
150+
idx &= (bufsz - 1)
151+
len -= 2
146152

147-
buffer := make([]byte, N*2)
148-
for i = 0; i < N-F; i++ {
149-
buffer[i] = ' '
153+
r.putbit(tokenStartBit)
154+
for mask := (1 << idxsz) >> 1; mask != 0; mask = mask >> 1 {
155+
b := idx&mask != 0
156+
r.putbit(b)
150157
}
151158

152-
for i = N - F; i < N*2; i++ {
153-
b, err := in.ReadByte()
154-
if err != nil {
155-
break
156-
}
157-
buffer[i] = b
159+
for mask := (1 << lensz) >> 1; mask != 0; mask = mask >> 1 {
160+
b := len&mask != 0
161+
r.putbit(b)
158162
}
163+
}
159164

160-
bufferend, r, s = i, N-F, 0
161-
for r < bufferend {
162-
f1 = min(F, bufferend-r)
163-
x = 0
164-
y = 1
165-
c = buffer[r]
166-
for i = r - 1; i >= s; i-- {
167-
if buffer[i] == c {
168-
for j = 1; j < f1; j++ {
169-
if buffer[i+j] != buffer[r+j] {
170-
break
171-
}
172-
}
173-
if j > y {
174-
x = i
175-
y = j
176-
}
177-
}
178-
}
179-
180-
if y <= P {
181-
output1(out, int(c))
182-
y = 1
183-
} else {
184-
output2(out, x&(N-1), y-2)
185-
}
186-
187-
r += y
188-
s += y
189-
if r >= N*2-F {
190-
for i = 0; i < N; i++ {
191-
buffer[i] = buffer[i+N]
192-
}
193-
bufferend -= N
194-
r -= N
195-
s -= N
165+
func (r *result) flush() {
166+
if r.bitMask != 128 {
167+
r.out.WriteByte(byte(r.bitBuffer))
168+
}
169+
}
196170

197-
for bufferend < N*2 {
198-
b, err := in.ReadByte()
199-
if err != nil {
200-
break
201-
}
202-
buffer[bufferend] = b
203-
bufferend++
204-
}
205-
}
171+
func (r *result) putbit(b bool) {
172+
if b {
173+
r.bitBuffer |= r.bitMask
174+
}
175+
r.bitMask = r.bitMask >> 1
176+
if r.bitMask == 0 {
177+
r.out.WriteByte(byte(r.bitBuffer))
178+
r.bitBuffer = 0
179+
r.bitMask = 128
206180
}
207-
flush_bit_buffer(out)
181+
}
208182

209-
return out.Bytes()
183+
func (r *result) bytes() []byte {
184+
return r.out.Bytes()
210185
}

0 commit comments

Comments
 (0)