Skip to content

Commit aafbabd

Browse files
authored
flate: Simplify matchlen (remove asm) (#1045)
With unsafe, there is no benefit from matchlen assembly. Remove it.
1 parent dbaa9c1 commit aafbabd

File tree

5 files changed

+48
-119
lines changed

5 files changed

+48
-119
lines changed

flate/fast_encoder.go

+47-5
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ package flate
77

88
import (
99
"fmt"
10+
"math/bits"
1011

1112
"github.com/klauspost/compress/internal/le"
1213
)
@@ -150,13 +151,33 @@ func (e *fastGen) matchlen(s, t int32, src []byte) int32 {
150151
panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")"))
151152
}
152153
}
153-
s1 := int(s) + maxMatchLength - 4
154-
if s1 > len(src) {
155-
s1 = len(src)
154+
s1 := int32(s) + maxMatchLength - 4
155+
if s1 > int32(len(src)) {
156+
s1 = int32(len(src))
156157
}
157158

159+
left := s1 - s
160+
n := 0
161+
for left >= 8 {
162+
diff := le.Load64(src, s) ^ le.Load64(src, t)
163+
if diff != 0 {
164+
return int32(n + bits.TrailingZeros64(diff)>>3)
165+
}
166+
s += 8
167+
t += 8
168+
left -= 8
169+
}
170+
171+
a := src[s:s1]
172+
b := src[t:]
173+
for i := range a {
174+
if a[i] != b[i] {
175+
break
176+
}
177+
n++
178+
}
179+
return int32(n)
158180
// Extend the match to be as long as possible.
159-
return int32(matchLen(src[s:s1], src[t:]))
160181
}
161182

162183
// matchlenLong will return the match length between offsets and t in src.
@@ -177,7 +198,28 @@ func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 {
177198
}
178199
}
179200
// Extend the match to be as long as possible.
180-
return int32(matchLen(src[s:], src[t:]))
201+
left := int32(len(src)) - s
202+
n := int32(0)
203+
for left >= 8 {
204+
diff := le.Load64(src, s) ^ le.Load64(src, t)
205+
if diff != 0 {
206+
return n + int32(bits.TrailingZeros64(diff)>>3)
207+
}
208+
s += 8
209+
t += 8
210+
n += 8
211+
left -= 8
212+
}
213+
214+
a := src[s:]
215+
b := src[t:]
216+
for i := range a {
217+
if a[i] != b[i] {
218+
break
219+
}
220+
n++
221+
}
222+
return n
181223
}
182224

183225
// Reset the encoding table.

flate/level1.go

+1-29
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,6 @@ package flate
22

33
import (
44
"fmt"
5-
"math/bits"
6-
7-
"github.com/klauspost/compress/internal/le"
85
)
96

107
// fastGen maintains the table for matches,
@@ -122,32 +119,7 @@ func (e *fastEncL1) Encode(dst *tokens, src []byte) {
122119

123120
// Extend the 4-byte match as long as possible.
124121
t := candidate.offset - e.cur
125-
var l = int32(4)
126-
if false {
127-
l = e.matchlenLong(s+4, t+4, src) + 4
128-
} else {
129-
// inlined:
130-
a := src[s:]
131-
b := src[t:]
132-
left := len(a) - 4
133-
for left >= 8 {
134-
if diff := le.Load64(a, l) ^ le.Load64(b, l); diff != 0 {
135-
l += int32(bits.TrailingZeros64(diff) >> 3)
136-
goto endMatch
137-
}
138-
l += 8
139-
left -= 8
140-
}
141-
a = a[l:]
142-
b = b[l:]
143-
for i := range a {
144-
if a[i] != b[i] {
145-
break
146-
}
147-
l++
148-
}
149-
endMatch:
150-
}
122+
l := e.matchlenLong(s+4, t+4, src) + 4
151123

152124
// Extend backwards
153125
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {

flate/matchlen_amd64.go

-16
This file was deleted.

flate/matchlen_amd64.s

-66
This file was deleted.

flate/matchlen_generic.go

-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
//go:build !amd64 || appengine || !gc || noasm
2-
// +build !amd64 appengine !gc noasm
3-
41
// Copyright 2019+ Klaus Post. All rights reserved.
52
// License information can be found in the LICENSE file.
63

0 commit comments

Comments
 (0)