Skip to content

Commit 934f1ac

Browse files
janduboisKonikz
authored and
Konikz
committed
Embed binary files as base64 encoded strings
They need to be broken into smaller lines; yqlib doesn't do this, and yamlfmt fails with a buffer overflow when it tries to keep existing line breaks. Signed-off-by: Jan Dubois <[email protected]> Signed-off-by: Konikz <[email protected]>
1 parent b2adef0 commit 934f1ac

File tree

2 files changed

+110
-9
lines changed

2 files changed

+110
-9
lines changed

pkg/limatmpl/embed.go

+70-6
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,14 @@ package limatmpl
66
import (
77
"bytes"
88
"context"
9+
"encoding/base64"
910
"fmt"
1011
"os"
1112
"path/filepath"
1213
"slices"
14+
"strings"
1315
"sync"
16+
"unicode"
1417

1518
"github.com/coreos/go-semver/semver"
1619
"github.com/lima-vm/lima/pkg/limayaml"
@@ -254,7 +257,7 @@ const mergeDocuments = `
254257
| $a | (select(.mountTypesUnsupported) | .mountTypesUnsupported) |= unique
255258
256259
# Remove the custom tags again so they do not clutter up the YAML output.
257-
| $a | .. tag = ""
260+
| $a | .. | select(tag == "!!tag") tag = ""
258261
`
259262

260263
// listFields returns dst and src fields like "list[idx].field".
@@ -552,11 +555,72 @@ func (tmpl *Template) combineNetworks() {
552555
}
553556
}
554557

558+
// yamlfmt will fail with a buffer overflow while trying to retain line breaks if the line
559+
// is longer than 64K. We will encode all text files that have a line that comes close.
560+
// maxLineLength is a constant; it is only a variable for the benefit of the unit tests.
561+
var maxLineLength = 65000
562+
563+
// encodeScriptReason returns the reason why a script needs to be base64 encoded or the empty string if it doesn't.
564+
func encodeScriptReason(script string) string {
565+
start := 0
566+
line := 1
567+
for i, r := range script {
568+
if !(unicode.IsPrint(r) || r == '\n' || r == '\r' || r == '\t') {
569+
return fmt.Sprintf("unprintable character %q at offset %d", r, i)
570+
}
571+
// maxLineLength includes final newline
572+
if i-start >= maxLineLength {
573+
return fmt.Sprintf("line %d (offset %d) is longer than %d characters", line, start, maxLineLength)
574+
}
575+
if r == '\n' {
576+
line++
577+
start = i + 1
578+
}
579+
}
580+
return ""
581+
}
582+
583+
// Break base64 strings into shorter chunks. Technically we could use maxLineLength here,
584+
// but shorter lines look better.
585+
const base64ChunkLength = 76
586+
587+
// binaryString returns a base64 encoded version of the binary string, broken into chunks
588+
// of at most base64ChunkLength characters per line.
589+
func binaryString(s string) string {
590+
encoded := base64.StdEncoding.EncodeToString([]byte(s))
591+
if len(encoded) <= base64ChunkLength {
592+
return encoded
593+
}
594+
595+
// Estimate capacity: encoded length + number of newlines
596+
lineCount := (len(encoded) + base64ChunkLength - 1) / base64ChunkLength
597+
builder := strings.Builder{}
598+
builder.Grow(len(encoded) + lineCount)
599+
600+
for i := 0; i < len(encoded); i += base64ChunkLength {
601+
end := i + base64ChunkLength
602+
if end > len(encoded) {
603+
end = len(encoded)
604+
}
605+
builder.WriteString(encoded[i:end])
606+
builder.WriteByte('\n')
607+
}
608+
609+
return builder.String()
610+
}
611+
555612
// updateScript replaces a "file" property with the actual script and then renames the field to newName ("script" or "content").
556-
func (tmpl *Template) updateScript(field string, idx int, newName, script string) {
613+
func (tmpl *Template) updateScript(field string, idx int, newName, script, file string) {
614+
tag := ""
615+
if reason := encodeScriptReason(script); reason != "" {
616+
logrus.Infof("File %q is being base64 encoded: %s", file, reason)
617+
script = binaryString(script)
618+
tag = "!!binary"
619+
}
557620
entry := fmt.Sprintf("$a.%s[%d].file", field, idx)
558-
// Assign script to the "file" field and then rename it to "script".
559-
tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s | key) = %q\n", entry, script, entry, newName))
621+
// Assign script to the "file" field and then rename it to "script" or "content".
622+
tmpl.expr.WriteString(fmt.Sprintf("| (%s) = %q | (%s) tag = %q | (%s | key) = %q\n",
623+
entry, script, entry, tag, entry, newName))
560624
}
561625

562626
// embedAllScripts replaces all "provision" and "probes" file references with the actual script.
@@ -579,7 +643,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error
579643
if err != nil {
580644
return err
581645
}
582-
tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes))
646+
tmpl.updateScript("probes", i, "script", string(scriptTmpl.Bytes), p.File.URL)
583647
}
584648
}
585649
for i, p := range tmpl.Config.Provision {
@@ -605,7 +669,7 @@ func (tmpl *Template) embedAllScripts(ctx context.Context, embedAll bool) error
605669
if err != nil {
606670
return err
607671
}
608-
tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes))
672+
tmpl.updateScript("provision", i, newName, string(scriptTmpl.Bytes), p.File.URL)
609673
}
610674
}
611675
return tmpl.evalExpr()

pkg/limatmpl/embed_test.go

+40-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-FileCopyrightText: Copyright The Lima Authors
22
// SPDX-License-Identifier: Apache-2.0
33

4-
package limatmpl_test
4+
package limatmpl
55

66
import (
77
"context"
@@ -11,7 +11,6 @@ import (
1111
"strings"
1212
"testing"
1313

14-
"github.com/lima-vm/lima/pkg/limatmpl"
1514
"github.com/lima-vm/lima/pkg/limayaml"
1615
"github.com/sirupsen/logrus"
1716
"gotest.tools/v3/assert"
@@ -392,6 +391,27 @@ provision:
392391
"base: https://example.com/lima-linux-riscv64.img",
393392
"{arch: riscv64, images: [{location: https://example.com/lima-linux-riscv64.img, arch: riscv64}]}",
394393
},
394+
{
395+
"Binary files are base64 encoded",
396+
`#
397+
provision:
398+
- mode: data
399+
file: base1.sh # This comment will move to the "content" key
400+
path: /tmp/data
401+
`,
402+
// base1.sh is binary because it contains an audible bell character '\a'
403+
"# base0.yaml is ignored\n---\n#!\a123456789012345678901234567890123456789012345678901234567890",
404+
`
405+
provision:
406+
- mode: data
407+
content: !!binary | # This comment will move to the "content" key
408+
IyEHMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0NTY3ODkwMTIzNDU2Nzg5MDEyMzQ1Njc4OTAxMjM0
409+
NTY3ODkw
410+
path: /tmp/data
411+
412+
# base0.yaml is ignored
413+
`,
414+
},
395415
}
396416

397417
func TestEmbed(t *testing.T) {
@@ -436,7 +456,7 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) {
436456
err := os.WriteFile(baseFilename, []byte(base), 0o600)
437457
assert.NilError(t, err, tc.description)
438458
}
439-
tmpl := &limatmpl.Template{
459+
tmpl := &Template{
440460
Bytes: fmt.Appendf(nil, "base: base0.yaml\n%s", tc.template),
441461
Locator: "tmpl.yaml",
442462
}
@@ -475,3 +495,20 @@ func RunEmbedTest(t *testing.T, tc embedTestCase) {
475495
assert.Assert(t, cmp.DeepEqual(tmpl.Config, &expected), tc.description)
476496
}
477497
}
498+
499+
func TestEncodeScriptReason(t *testing.T) {
500+
maxLineLength = 8
501+
t.Run("regular script", func(t *testing.T) {
502+
reason := encodeScriptReason("0123456\n")
503+
assert.Equal(t, reason, "")
504+
})
505+
t.Run("binary script", func(t *testing.T) {
506+
reason := encodeScriptReason("abc\a123")
507+
assert.Equal(t, reason, "unprintable character '\\a' at offset 3")
508+
})
509+
t.Run("long line", func(t *testing.T) {
510+
// newline character is included in character count
511+
reason := encodeScriptReason("line 1\nline 2\n01234567\n")
512+
assert.Equal(t, reason, "line 3 (offset 14) is longer than 8 characters")
513+
})
514+
}

0 commit comments

Comments
 (0)