Skip to content

Commit 6fb5266

Browse files
committed
Few performance improvements
replaceEscapes() got called for every string, and key.String() gets called a lot in the parser, so small improvements add up. Also figured that calling replaceEscapes() for every string isn't really needed. It's about 20 to 30% faster (depending on the TOML file).
1 parent c0a26cb commit 6fb5266

File tree

8 files changed

+350
-1286
lines changed

8 files changed

+350
-1286
lines changed

bench_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ func BenchmarkDecode(b *testing.B) {
5252
}
5353

5454
b.Run("large-doc", func(b *testing.B) {
55-
d, err := os.ReadFile("testdata/ja-JP.toml")
55+
d, err := os.ReadFile("testdata/Cargo.toml")
5656
if err != nil {
5757
b.Fatal(err)
5858
}

decode_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1290,7 +1290,7 @@ func TestMetaKeys(t *testing.T) {
12901290
}
12911291

12921292
func TestDecodeParallel(t *testing.T) {
1293-
doc, err := os.ReadFile("testdata/ja-JP.toml")
1293+
doc, err := os.ReadFile("testdata/Cargo.toml")
12941294
if err != nil {
12951295
t.Fatal(err)
12961296
}
@@ -1323,3 +1323,21 @@ func errorContains(have error, want string) bool {
13231323
}
13241324
return strings.Contains(have.Error(), want)
13251325
}
1326+
1327+
func BenchmarkEscapes(b *testing.B) {
1328+
p := new(parser)
1329+
it := item{}
1330+
str := strings.Repeat("hello, world!\n", 10)
1331+
b.ResetTimer()
1332+
for n := 0; n < b.N; n++ {
1333+
p.replaceEscapes(it, str)
1334+
}
1335+
}
1336+
1337+
func BenchmarkKey(b *testing.B) {
1338+
k := Key{"cargo-credential-macos-keychain", "version"}
1339+
b.ResetTimer()
1340+
for n := 0; n < b.N; n++ {
1341+
k.String()
1342+
}
1343+
}

lex.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ const (
1717
itemEOF
1818
itemText
1919
itemString
20+
itemStringEsc
2021
itemRawString
2122
itemMultilineString
2223
itemRawMultilineString
@@ -53,6 +54,7 @@ type lexer struct {
5354
state stateFn
5455
items chan item
5556
tomlNext bool
57+
esc bool
5658

5759
// Allow for backing up up to 4 runes. This is necessary because TOML
5860
// contains 3-rune tokens (""" and ''').
@@ -696,7 +698,12 @@ func lexString(lx *lexer) stateFn {
696698
return lexStringEscape
697699
case r == '"':
698700
lx.backup()
699-
lx.emit(itemString)
701+
if lx.esc {
702+
lx.esc = false
703+
lx.emit(itemStringEsc)
704+
} else {
705+
lx.emit(itemString)
706+
}
700707
lx.next()
701708
lx.ignore()
702709
return lx.pop()
@@ -746,6 +753,7 @@ func lexMultilineString(lx *lexer) stateFn {
746753
lx.backup() /// backup: don't include the """ in the item.
747754
lx.backup()
748755
lx.backup()
756+
lx.esc = false
749757
lx.emit(itemMultilineString)
750758
lx.next() /// Read over ''' again and discard it.
751759
lx.next()
@@ -835,6 +843,7 @@ func lexMultilineStringEscape(lx *lexer) stateFn {
835843
}
836844

837845
func lexStringEscape(lx *lexer) stateFn {
846+
lx.esc = true
838847
r := lx.next()
839848
switch r {
840849
case 'e':
@@ -1199,7 +1208,7 @@ func (itype itemType) String() string {
11991208
return "EOF"
12001209
case itemText:
12011210
return "Text"
1202-
case itemString, itemRawString, itemMultilineString, itemRawMultilineString:
1211+
case itemString, itemStringEsc, itemRawString, itemMultilineString, itemRawMultilineString:
12031212
return "String"
12041213
case itemBool:
12051214
return "Bool"

meta.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -94,21 +94,41 @@ func (md *MetaData) Undecoded() []Key {
9494
type Key []string
9595

9696
func (k Key) String() string {
97-
ss := make([]string, len(k))
98-
for i := range k {
99-
ss[i] = k.maybeQuoted(i)
97+
// This is called quite often, so it's a bit funky to make it faster.
98+
var b strings.Builder
99+
b.Grow(len(k) * 25)
100+
outer:
101+
for i, kk := range k {
102+
if i > 0 {
103+
b.WriteByte('.')
104+
}
105+
if kk == "" {
106+
b.WriteString(`""`)
107+
} else {
108+
for _, r := range kk {
109+
// "Inline" isBareKeyChar
110+
if !((r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-') {
111+
b.WriteByte('"')
112+
b.WriteString(dblQuotedReplacer.Replace(kk))
113+
b.WriteByte('"')
114+
continue outer
115+
}
116+
}
117+
b.WriteString(kk)
118+
}
100119
}
101-
return strings.Join(ss, ".")
120+
return b.String()
102121
}
103122

104123
func (k Key) maybeQuoted(i int) string {
105124
if k[i] == "" {
106125
return `""`
107126
}
108-
for _, c := range k[i] {
109-
if !isBareKeyChar(c, false) {
110-
return `"` + dblQuotedReplacer.Replace(k[i]) + `"`
127+
for _, r := range k[i] {
128+
if (r >= 'A' && r <= 'Z') || (r >= 'a' && r <= 'z') || (r >= '0' && r <= '9') || r == '_' || r == '-' {
129+
continue
111130
}
131+
return `"` + dblQuotedReplacer.Replace(k[i]) + `"`
112132
}
113133
return k[i]
114134
}

parse.go

Lines changed: 56 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ func (p *parser) keyString(it item) string {
224224
switch it.typ {
225225
case itemText:
226226
return it.val
227-
case itemString, itemMultilineString,
227+
case itemString, itemStringEsc, itemMultilineString,
228228
itemRawString, itemRawMultilineString:
229229
s, _ := p.value(it, false)
230230
return s.(string)
@@ -244,6 +244,8 @@ var datetimeRepl = strings.NewReplacer(
244244
func (p *parser) value(it item, parentIsArray bool) (any, tomlType) {
245245
switch it.typ {
246246
case itemString:
247+
return it.val, p.typeOfPrimitive(it)
248+
case itemStringEsc:
247249
return p.replaceEscapes(it, it.val), p.typeOfPrimitive(it)
248250
case itemMultilineString:
249251
return p.replaceEscapes(it, p.stripEscapedNewlines(stripFirstNewline(it.val))), p.typeOfPrimitive(it)
@@ -707,8 +709,11 @@ func stripFirstNewline(s string) string {
707709
// the next newline. After a line-ending backslash, all whitespace is removed
708710
// until the next non-whitespace character.
709711
func (p *parser) stripEscapedNewlines(s string) string {
710-
var b strings.Builder
711-
var i int
712+
var (
713+
b strings.Builder
714+
i int
715+
)
716+
b.Grow(len(s))
712717
for {
713718
ix := strings.Index(s[i:], `\`)
714719
if ix < 0 {
@@ -738,9 +743,8 @@ func (p *parser) stripEscapedNewlines(s string) string {
738743
continue
739744
}
740745
if !strings.Contains(s[i:j], "\n") {
741-
// This is not a line-ending backslash.
742-
// (It's a bad escape sequence, but we can let
743-
// replaceEscapes catch it.)
746+
// This is not a line-ending backslash. (It's a bad escape sequence,
747+
// but we can let replaceEscapes catch it.)
744748
i++
745749
continue
746750
}
@@ -751,79 +755,78 @@ func (p *parser) stripEscapedNewlines(s string) string {
751755
}
752756

753757
func (p *parser) replaceEscapes(it item, str string) string {
754-
replaced := make([]rune, 0, len(str))
755-
s := []byte(str)
756-
r := 0
757-
for r < len(s) {
758-
if s[r] != '\\' {
759-
c, size := utf8.DecodeRune(s[r:])
760-
r += size
761-
replaced = append(replaced, c)
758+
var (
759+
b strings.Builder
760+
skip = 0
761+
)
762+
b.Grow(len(str))
763+
for i, c := range str {
764+
if skip > 0 {
765+
skip--
766+
continue
767+
}
768+
if c != '\\' {
769+
b.WriteRune(c)
762770
continue
763771
}
764-
r += 1
765-
if r >= len(s) {
772+
773+
if i >= len(str) {
766774
p.bug("Escape sequence at end of string.")
767775
return ""
768776
}
769-
switch s[r] {
777+
switch str[i+1] {
770778
default:
771-
p.bug("Expected valid escape code after \\, but got %q.", s[r])
779+
p.bug("Expected valid escape code after \\, but got %q.", str[i+1])
772780
case ' ', '\t':
773-
p.panicItemf(it, "invalid escape: '\\%c'", s[r])
781+
p.panicItemf(it, "invalid escape: '\\%c'", str[i+1])
774782
case 'b':
775-
replaced = append(replaced, rune(0x0008))
776-
r += 1
783+
b.WriteByte(0x08)
784+
skip = 1
777785
case 't':
778-
replaced = append(replaced, rune(0x0009))
779-
r += 1
786+
b.WriteByte(0x09)
787+
skip = 1
780788
case 'n':
781-
replaced = append(replaced, rune(0x000A))
782-
r += 1
789+
b.WriteByte(0x0a)
790+
skip = 1
783791
case 'f':
784-
replaced = append(replaced, rune(0x000C))
785-
r += 1
792+
b.WriteByte(0x0c)
793+
skip = 1
786794
case 'r':
787-
replaced = append(replaced, rune(0x000D))
788-
r += 1
795+
b.WriteByte(0x0d)
796+
skip = 1
789797
case 'e':
790798
if p.tomlNext {
791-
replaced = append(replaced, rune(0x001B))
792-
r += 1
799+
b.WriteByte(0x1b)
800+
skip = 1
793801
}
794802
case '"':
795-
replaced = append(replaced, rune(0x0022))
796-
r += 1
803+
b.WriteByte(0x22)
804+
skip = 1
797805
case '\\':
798-
replaced = append(replaced, rune(0x005C))
799-
r += 1
806+
b.WriteByte(0x5c)
807+
skip = 1
808+
// The lexer guarantees the correct number of characters are present;
809+
// don't need to check here.
800810
case 'x':
801811
if p.tomlNext {
802-
escaped := p.asciiEscapeToUnicode(it, s[r+1:r+3])
803-
replaced = append(replaced, escaped)
804-
r += 3
812+
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+4])
813+
b.WriteRune(escaped)
814+
skip = 3
805815
}
806816
case 'u':
807-
// At this point, we know we have a Unicode escape of the form
808-
// `uXXXX` at [r, r+5). (Because the lexer guarantees this
809-
// for us.)
810-
escaped := p.asciiEscapeToUnicode(it, s[r+1:r+5])
811-
replaced = append(replaced, escaped)
812-
r += 5
817+
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+6])
818+
b.WriteRune(escaped)
819+
skip = 5
813820
case 'U':
814-
// At this point, we know we have a Unicode escape of the form
815-
// `uXXXX` at [r, r+9). (Because the lexer guarantees this
816-
// for us.)
817-
escaped := p.asciiEscapeToUnicode(it, s[r+1:r+9])
818-
replaced = append(replaced, escaped)
819-
r += 9
821+
escaped := p.asciiEscapeToUnicode(it, str[i+2:i+10])
822+
b.WriteRune(escaped)
823+
skip = 9
820824
}
821825
}
822-
return string(replaced)
826+
return b.String()
823827
}
824828

825-
func (p *parser) asciiEscapeToUnicode(it item, bs []byte) rune {
826-
s := string(bs)
829+
func (p *parser) asciiEscapeToUnicode(it item, s string) rune {
827830
hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
828831
if err != nil {
829832
p.bug("Could not parse '%s' as a hexadecimal number, but the lexer claims it's OK: %s", s, err)

0 commit comments

Comments
 (0)