Skip to content

Commit b3d1ec8

Browse files
authored
reapply: "fix: refactor coder logger to allow flush without deadlock (#375)" (#377)
1 parent a1e8f3c commit b3d1ec8

File tree

6 files changed

+328
-86
lines changed

6 files changed

+328
-86
lines changed

cmd/envbuilder/main.go

+13-2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ func envbuilderCmd() serpent.Command {
3737
Options: o.CLI(),
3838
Handler: func(inv *serpent.Invocation) error {
3939
o.SetDefaults()
40+
var preExecs []func()
41+
preExec := func() {
42+
for _, fn := range preExecs {
43+
fn()
44+
}
45+
preExecs = nil
46+
}
47+
defer preExec() // Ensure cleanup in case of error.
48+
4049
o.Logger = log.New(os.Stderr, o.Verbose)
4150
if o.CoderAgentURL != "" {
4251
if o.CoderAgentToken == "" {
@@ -49,7 +58,9 @@ func envbuilderCmd() serpent.Command {
4958
coderLog, closeLogs, err := log.Coder(inv.Context(), u, o.CoderAgentToken)
5059
if err == nil {
5160
o.Logger = log.Wrap(o.Logger, coderLog)
52-
defer closeLogs()
61+
preExecs = append(preExecs, func() {
62+
closeLogs()
63+
})
5364
// This adds the envbuilder subsystem.
5465
// If telemetry is enabled in a Coder deployment,
5566
// this will be reported and help us understand
@@ -78,7 +89,7 @@ func envbuilderCmd() serpent.Command {
7889
return nil
7990
}
8091

81-
err := envbuilder.Run(inv.Context(), o)
92+
err := envbuilder.Run(inv.Context(), o, preExec)
8293
if err != nil {
8394
o.Logger(log.LevelError, "error: %s", err)
8495
}

envbuilder.go

+6-1
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,9 @@ type execArgsInfo struct {
8484
// Logger is the logf to use for all operations.
8585
// Filesystem is the filesystem to use for all operations.
8686
// Defaults to the host filesystem.
87-
func Run(ctx context.Context, opts options.Options) error {
87+
// preExec are any functions that should be called before exec'ing the init
88+
// command. This is useful for ensuring that defers get run.
89+
func Run(ctx context.Context, opts options.Options, preExec ...func()) error {
8890
var args execArgsInfo
8991
// Run in a separate function to ensure all defers run before we
9092
// setuid or exec.
@@ -103,6 +105,9 @@ func Run(ctx context.Context, opts options.Options) error {
103105
}
104106

105107
opts.Logger(log.LevelInfo, "=== Running the init command %s %+v as the %q user...", opts.InitCommand, args.InitArgs, args.UserInfo.user.Username)
108+
for _, fn := range preExec {
109+
fn()
110+
}
106111

107112
err = syscall.Exec(args.InitCommand, append([]string{args.InitCommand}, args.InitArgs...), args.Environ)
108113
if err != nil {

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ require (
2525
github.com/gliderlabs/ssh v0.3.7
2626
github.com/go-git/go-billy/v5 v5.5.0
2727
github.com/go-git/go-git/v5 v5.12.0
28+
github.com/google/go-cmp v0.6.0
2829
github.com/google/go-containerregistry v0.20.1
2930
github.com/google/uuid v1.6.0
3031
github.com/hashicorp/go-multierror v1.1.1
@@ -149,7 +150,6 @@ require (
149150
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
150151
github.com/golang/protobuf v1.5.4 // indirect
151152
github.com/google/btree v1.1.2 // indirect
152-
github.com/google/go-cmp v0.6.0 // indirect
153153
github.com/google/nftables v0.2.0 // indirect
154154
github.com/google/pprof v0.0.0-20230817174616-7a8ec2ada47b // indirect
155155
github.com/gorilla/handlers v1.5.1 // indirect

integration/integration_test.go

+67
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ import (
2323
"testing"
2424
"time"
2525

26+
"github.com/coder/coder/v2/codersdk"
27+
"github.com/coder/coder/v2/codersdk/agentsdk"
2628
"github.com/coder/envbuilder"
2729
"github.com/coder/envbuilder/devcontainer/features"
2830
"github.com/coder/envbuilder/internal/magicdir"
@@ -58,6 +60,71 @@ const (
5860
testImageUbuntu = "localhost:5000/envbuilder-test-ubuntu:latest"
5961
)
6062

63+
func TestLogs(t *testing.T) {
64+
t.Parallel()
65+
66+
token := uuid.NewString()
67+
logsDone := make(chan struct{})
68+
69+
logHandler := func(w http.ResponseWriter, r *http.Request) {
70+
switch r.URL.Path {
71+
case "/api/v2/buildinfo":
72+
w.Header().Set("Content-Type", "application/json")
73+
_, _ = w.Write([]byte(`{"version": "v2.8.9"}`))
74+
return
75+
case "/api/v2/workspaceagents/me/logs":
76+
w.WriteHeader(http.StatusOK)
77+
tokHdr := r.Header.Get(codersdk.SessionTokenHeader)
78+
assert.Equal(t, token, tokHdr)
79+
var req agentsdk.PatchLogs
80+
err := json.NewDecoder(r.Body).Decode(&req)
81+
if err != nil {
82+
http.Error(w, err.Error(), http.StatusBadRequest)
83+
return
84+
}
85+
for _, log := range req.Logs {
86+
t.Logf("got log: %+v", log)
87+
if strings.Contains(log.Output, "Running the init command") {
88+
close(logsDone)
89+
return
90+
}
91+
}
92+
return
93+
default:
94+
t.Errorf("unexpected request to %s", r.URL.Path)
95+
w.WriteHeader(http.StatusNotFound)
96+
return
97+
}
98+
}
99+
logSrv := httptest.NewServer(http.HandlerFunc(logHandler))
100+
defer logSrv.Close()
101+
102+
// Ensures that a Git repository with a devcontainer.json is cloned and built.
103+
srv := gittest.CreateGitServer(t, gittest.Options{
104+
Files: map[string]string{
105+
"devcontainer.json": `{
106+
"build": {
107+
"dockerfile": "Dockerfile"
108+
},
109+
}`,
110+
"Dockerfile": fmt.Sprintf(`FROM %s`, testImageUbuntu),
111+
},
112+
})
113+
_, err := runEnvbuilder(t, runOpts{env: []string{
114+
envbuilderEnv("GIT_URL", srv.URL),
115+
"CODER_AGENT_URL=" + logSrv.URL,
116+
"CODER_AGENT_TOKEN=" + token,
117+
}})
118+
require.NoError(t, err)
119+
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
120+
defer cancel()
121+
select {
122+
case <-ctx.Done():
123+
t.Fatal("timed out waiting for logs")
124+
case <-logsDone:
125+
}
126+
}
127+
61128
func TestInitScriptInitCommand(t *testing.T) {
62129
t.Parallel()
63130

log/coder.go

+70-45
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"net/url"
88
"os"
9+
"sync"
910
"time"
1011

1112
"cdr.dev/slog"
@@ -27,13 +28,14 @@ var (
2728
minAgentAPIV2 = "v2.9"
2829
)
2930

30-
// Coder establishes a connection to the Coder instance located at
31-
// coderURL and authenticates using token. It then establishes a
32-
// dRPC connection to the Agent API and begins sending logs.
33-
// If the version of Coder does not support the Agent API, it will
34-
// fall back to using the PatchLogs endpoint.
35-
// The returned function is used to block until all logs are sent.
36-
func Coder(ctx context.Context, coderURL *url.URL, token string) (Func, func(), error) {
31+
// Coder establishes a connection to the Coder instance located at coderURL and
32+
// authenticates using token. It then establishes a dRPC connection to the Agent
33+
// API and begins sending logs. If the version of Coder does not support the
34+
// Agent API, it will fall back to using the PatchLogs endpoint. The closer is
35+
// used to close the logger and to wait at most logSendGracePeriod for logs to
36+
// be sent. Cancelling the context will close the logs immediately without
37+
// waiting for logs to be sent.
38+
func Coder(ctx context.Context, coderURL *url.URL, token string) (logger Func, closer func(), err error) {
3739
// To troubleshoot issues, we need some way of logging.
3840
metaLogger := slog.Make(sloghuman.Sink(os.Stderr))
3941
defer metaLogger.Sync()
@@ -44,18 +46,39 @@ func Coder(ctx context.Context, coderURL *url.URL, token string) (Func, func(),
4446
}
4547
if semver.Compare(semver.MajorMinor(bi.Version), minAgentAPIV2) < 0 {
4648
metaLogger.Warn(ctx, "Detected Coder version incompatible with AgentAPI v2, falling back to deprecated API", slog.F("coder_version", bi.Version))
47-
sendLogs, flushLogs := sendLogsV1(ctx, client, metaLogger.Named("send_logs_v1"))
48-
return sendLogs, flushLogs, nil
49+
logger, closer = sendLogsV1(ctx, client, metaLogger.Named("send_logs_v1"))
50+
return logger, closer, nil
4951
}
52+
53+
// Create a new context so we can ensure the connection is torn down.
54+
ctx, cancel := context.WithCancel(ctx)
55+
defer func() {
56+
if err != nil {
57+
cancel()
58+
}
59+
}()
60+
// Note that ctx passed to initRPC will be inherited by the
61+
// underlying connection, nothing we can do about that here.
5062
dac, err := initRPC(ctx, client, metaLogger.Named("init_rpc"))
5163
if err != nil {
5264
// Logged externally
5365
return nil, nil, fmt.Errorf("init coder rpc client: %w", err)
5466
}
5567
ls := agentsdk.NewLogSender(metaLogger.Named("coder_log_sender"))
5668
metaLogger.Warn(ctx, "Sending logs via AgentAPI v2", slog.F("coder_version", bi.Version))
57-
sendLogs, doneFunc := sendLogsV2(ctx, dac, ls, metaLogger.Named("send_logs_v2"))
58-
return sendLogs, doneFunc, nil
69+
logger, loggerCloser := sendLogsV2(ctx, dac, ls, metaLogger.Named("send_logs_v2"))
70+
var closeOnce sync.Once
71+
closer = func() {
72+
loggerCloser()
73+
74+
closeOnce.Do(func() {
75+
// Typically cancel would be after Close, but we want to be
76+
// sure there's nothing that might block on Close.
77+
cancel()
78+
_ = dac.DRPCConn().Close()
79+
})
80+
}
81+
return logger, closer, nil
5982
}
6083

6184
type coderLogSender interface {
@@ -74,7 +97,7 @@ func initClient(coderURL *url.URL, token string) *agentsdk.Client {
7497
func initRPC(ctx context.Context, client *agentsdk.Client, l slog.Logger) (proto.DRPCAgentClient20, error) {
7598
var c proto.DRPCAgentClient20
7699
var err error
77-
retryCtx, retryCancel := context.WithTimeout(context.Background(), rpcConnectTimeout)
100+
retryCtx, retryCancel := context.WithTimeout(ctx, rpcConnectTimeout)
78101
defer retryCancel()
79102
attempts := 0
80103
for r := retry.New(100*time.Millisecond, time.Second); r.Wait(retryCtx); {
@@ -95,65 +118,67 @@ func initRPC(ctx context.Context, client *agentsdk.Client, l slog.Logger) (proto
95118

96119
// sendLogsV1 uses the PatchLogs endpoint to send logs.
97120
// This is deprecated, but required for backward compatibility with older versions of Coder.
98-
func sendLogsV1(ctx context.Context, client *agentsdk.Client, l slog.Logger) (Func, func()) {
121+
func sendLogsV1(ctx context.Context, client *agentsdk.Client, l slog.Logger) (logger Func, closer func()) {
99122
// nolint: staticcheck // required for backwards compatibility
100-
sendLogs, flushLogs := agentsdk.LogsSender(agentsdk.ExternalLogSourceID, client.PatchLogs, slog.Logger{})
123+
sendLog, flushAndClose := agentsdk.LogsSender(agentsdk.ExternalLogSourceID, client.PatchLogs, slog.Logger{})
124+
var mu sync.Mutex
101125
return func(lvl Level, msg string, args ...any) {
102126
log := agentsdk.Log{
103127
CreatedAt: time.Now(),
104128
Output: fmt.Sprintf(msg, args...),
105129
Level: codersdk.LogLevel(lvl),
106130
}
107-
if err := sendLogs(ctx, log); err != nil {
131+
mu.Lock()
132+
defer mu.Unlock()
133+
if err := sendLog(ctx, log); err != nil {
108134
l.Warn(ctx, "failed to send logs to Coder", slog.Error(err))
109135
}
110136
}, func() {
111-
if err := flushLogs(ctx); err != nil {
137+
ctx, cancel := context.WithTimeout(ctx, logSendGracePeriod)
138+
defer cancel()
139+
if err := flushAndClose(ctx); err != nil {
112140
l.Warn(ctx, "failed to flush logs", slog.Error(err))
113141
}
114142
}
115143
}
116144

117145
// sendLogsV2 uses the v2 agent API to send logs. Only compatibile with coder versions >= 2.9.
118-
func sendLogsV2(ctx context.Context, dest agentsdk.LogDest, ls coderLogSender, l slog.Logger) (Func, func()) {
146+
func sendLogsV2(ctx context.Context, dest agentsdk.LogDest, ls coderLogSender, l slog.Logger) (logger Func, closer func()) {
147+
sendCtx, sendCancel := context.WithCancel(ctx)
119148
done := make(chan struct{})
120149
uid := uuid.New()
121150
go func() {
122151
defer close(done)
123-
if err := ls.SendLoop(ctx, dest); err != nil {
152+
if err := ls.SendLoop(sendCtx, dest); err != nil {
124153
if !errors.Is(err, context.Canceled) {
125154
l.Warn(ctx, "failed to send logs to Coder", slog.Error(err))
126155
}
127156
}
128-
129-
// Wait for up to 10 seconds for logs to finish sending.
130-
sendCtx, sendCancel := context.WithTimeout(context.Background(), logSendGracePeriod)
131-
defer sendCancel()
132-
// Try once more to send any pending logs
133-
if err := ls.SendLoop(sendCtx, dest); err != nil {
134-
if !errors.Is(err, context.DeadlineExceeded) {
135-
l.Warn(ctx, "failed to send remaining logs to Coder", slog.Error(err))
136-
}
137-
}
138-
ls.Flush(uid)
139-
if err := ls.WaitUntilEmpty(sendCtx); err != nil {
140-
if !errors.Is(err, context.DeadlineExceeded) {
141-
l.Warn(ctx, "log sender did not empty", slog.Error(err))
142-
}
143-
}
144157
}()
145158

146-
logFunc := func(l Level, msg string, args ...any) {
147-
ls.Enqueue(uid, agentsdk.Log{
148-
CreatedAt: time.Now(),
149-
Output: fmt.Sprintf(msg, args...),
150-
Level: codersdk.LogLevel(l),
151-
})
152-
}
159+
var closeOnce sync.Once
160+
return func(l Level, msg string, args ...any) {
161+
ls.Enqueue(uid, agentsdk.Log{
162+
CreatedAt: time.Now(),
163+
Output: fmt.Sprintf(msg, args...),
164+
Level: codersdk.LogLevel(l),
165+
})
166+
}, func() {
167+
closeOnce.Do(func() {
168+
// Trigger a flush and wait for logs to be sent.
169+
ls.Flush(uid)
170+
ctx, cancel := context.WithTimeout(ctx, logSendGracePeriod)
171+
defer cancel()
172+
err := ls.WaitUntilEmpty(ctx)
173+
if err != nil {
174+
l.Warn(ctx, "log sender did not empty", slog.Error(err))
175+
}
153176

154-
doneFunc := func() {
155-
<-done
156-
}
177+
// Stop the send loop.
178+
sendCancel()
179+
})
157180

158-
return logFunc, doneFunc
181+
// Wait for the send loop to finish.
182+
<-done
183+
}
159184
}

0 commit comments

Comments
 (0)