Skip to content

Commit cf24e1c

Browse files
authored
[image-builder] Avoid returning "Unknown" (#20854)
* [cline] Drop learning journal (left-over) * [image-builder-mk3] Avoid sending "Unknown" error code in all cases * [ws-manager-mk2] image-builder proxy: Prefix proxied gRPC services to avoid name clashes with the original service Also, avoid sending "Unknown" errors, and wrap it into meaningfull gRPC status codes (+ additional logging)
1 parent 7d1a8eb commit cf24e1c

File tree

6 files changed

+376
-167
lines changed

6 files changed

+376
-167
lines changed

.clinerules/learning-journal.md

Lines changed: 0 additions & 152 deletions
This file was deleted.

components/image-builder-mk3/pkg/orchestrator/orchestrator.go

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
260260
BaseRef: req.BaseImageNameResolved,
261261
})
262262
if err != nil {
263-
return err
263+
return handleFailedBuildStreamResponse(err, "cannot send build response")
264264
}
265265
return nil
266266
}
@@ -307,7 +307,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
307307
BaseRef: baseref,
308308
})
309309
if err != nil {
310-
return err
310+
return handleFailedBuildStreamResponse(err, "cannot send build response")
311311
}
312312
return nil
313313
}
@@ -322,11 +322,12 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
322322

323323
randomUUID, err := uuid.NewRandom()
324324
if err != nil {
325-
return
325+
return status.Errorf(codes.Internal, "failed to generate build ID: %v", err)
326326
}
327+
buildID := randomUUID.String()
328+
log := log.WithField("buildID", buildID)
327329

328330
var (
329-
buildID = randomUUID.String()
330331
buildBase = "false"
331332
contextPath = "."
332333
dockerfilePath = "Dockerfile"
@@ -368,7 +369,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
368369

369370
pbaseref, err := reference.ParseNormalizedNamed(baseref)
370371
if err != nil {
371-
return xerrors.Errorf("cannot parse baseref: %v", err)
372+
return status.Errorf(codes.InvalidArgument, "cannot parse baseref: %v", err)
372373
}
373374
bobBaseref := "localhost:8080/base"
374375
if r, ok := pbaseref.(reference.Digested); ok {
@@ -384,7 +385,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
384385
})
385386
additionalAuth, err = json.Marshal(ath)
386387
if err != nil {
387-
return xerrors.Errorf("cannot marshal additional auth: %w", err)
388+
return status.Errorf(codes.InvalidArgument, "cannot marshal additional auth: %v", err)
388389
}
389390
}
390391

@@ -432,7 +433,7 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
432433
Name: "WORKSPACEKIT_BOBPROXY_ADDITIONALAUTH",
433434
Value: string(additionalAuth),
434435
},
435-
{Name: "SUPERVISOR_DEBUG_ENABLE", Value: fmt.Sprintf("%v", log.Log.Logger.IsLevelEnabled(logrus.DebugLevel))},
436+
{Name: "SUPERVISOR_DEBUG_ENABLE", Value: fmt.Sprintf("%v", log.Logger.IsLevelEnabled(logrus.DebugLevel))},
436437
},
437438
},
438439
Type: wsmanapi.WorkspaceType_IMAGEBUILD,
@@ -476,8 +477,8 @@ func (o *Orchestrator) Build(req *protocol.BuildRequest, resp protocol.ImageBuil
476477

477478
err := resp.Send(update)
478479
if err != nil {
479-
log.WithError(err).Error("cannot forward build update - dropping listener")
480-
return status.Errorf(codes.Unknown, "cannot send update: %v", err)
480+
log.WithError(err).Info("cannot forward build update - dropping listener")
481+
return handleFailedBuildStreamResponse(err, "cannot send update")
481482
}
482483

483484
if update.Status == protocol.BuildStatus_done_failure || update.Status == protocol.BuildStatus_done_success {
@@ -555,8 +556,8 @@ func (o *Orchestrator) Logs(req *protocol.LogsRequest, resp protocol.ImageBuilde
555556

556557
err := resp.Send(update)
557558
if err != nil {
558-
log.WithError(err).Error("cannot forward log output - dropping listener")
559-
return status.Errorf(codes.Unknown, "cannot send log output: %v", err)
559+
log.WithError(err).Info("cannot forward log output - dropping listener")
560+
return handleFailedBuildStreamResponse(err, "cannot send log output")
560561
}
561562
}
562563

@@ -709,6 +710,33 @@ func (o *Orchestrator) getWorkspaceImageRef(ctx context.Context, baseref string)
709710
return fmt.Sprintf("%s:%x", o.Config.WorkspaceImageRepository, dst), nil
710711
}
711712

713+
func handleFailedBuildStreamResponse(err error, msg string) error {
714+
if err == nil {
715+
// OK is OK
716+
return nil
717+
}
718+
719+
// If the error is a context.DeadlineExceeded, we return nil (OK) as requested.
720+
if errors.Is(err, context.DeadlineExceeded) {
721+
// Return nil (OK) for DeadlineExceeded
722+
return nil
723+
}
724+
725+
// If it's already a gRPC status error, check for DeadlineExceeded
726+
if st, ok := status.FromError(err); ok {
727+
if st.Code() == codes.DeadlineExceeded {
728+
// Return nil (OK) for DeadlineExceeded as requested
729+
return nil
730+
}
731+
732+
log.WithError(err).WithField("code", status.Code(err)).Error(fmt.Sprintf("unexpected error while sending build response: %s", msg))
733+
return err
734+
}
735+
736+
log.WithError(err).Error(fmt.Sprintf("unexpected error while sending build response: %s", msg))
737+
return status.Errorf(codes.Unavailable, "%s: %v", msg, err)
738+
}
739+
712740
// parentCantCancelContext is a bit of a hack. We have some operations which we want to keep alive even after clients
713741
// disconnect. gRPC cancels the context once a client disconnects, thus we intercept the cancelation and act as if
714742
// nothing had happened.

components/ws-manager-mk2/main.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,10 @@ import (
5353
//+kubebuilder:scaffold:imports
5454
)
5555

56+
// PROXIED_GRPC_SERVICE_PREFIX is the prefix used for proxied gRPC services.
57+
// It is used to avoid conflicts with the original service names in metrics, and to filter out proxied services in SLIs etc.
58+
const PROXIED_GRPC_SERVICE_PREFIX = "proxied"
59+
5660
var (
5761
// ServiceName is the name we use for tracing/logging
5862
ServiceName = "ws-manager-mk2"
@@ -258,9 +262,12 @@ func setupGRPCService(cfg *config.ServiceConfiguration, k8s client.Client, maint
258262
grpcMetrics.EnableHandlingTimeHistogram()
259263
metrics.Registry.MustRegister(grpcMetrics)
260264

265+
// Create interceptor for prefixing the proxied gRPC service names, to avoid conflicts with the original service names in metrics
266+
grpcServicePrefixer := &imgproxy.ServiceNamePrefixerInterceptor{Prefix: PROXIED_GRPC_SERVICE_PREFIX}
267+
261268
grpcOpts := common_grpc.ServerOptionsWithInterceptors(
262-
[]grpc.StreamServerInterceptor{grpcMetrics.StreamServerInterceptor()},
263-
[]grpc.UnaryServerInterceptor{grpcMetrics.UnaryServerInterceptor(), ratelimits.UnaryInterceptor()},
269+
[]grpc.StreamServerInterceptor{grpcServicePrefixer.StreamServerInterceptor(), grpcMetrics.StreamServerInterceptor()},
270+
[]grpc.UnaryServerInterceptor{grpcServicePrefixer.UnaryServerInterceptor(), grpcMetrics.UnaryServerInterceptor(), ratelimits.UnaryInterceptor()},
264271
)
265272
if cfg.RPCServer.TLS.CA != "" && cfg.RPCServer.TLS.Certificate != "" && cfg.RPCServer.TLS.PrivateKey != "" {
266273
tlsConfig, err := common_grpc.ClientAuthTLSConfig(

components/ws-manager-mk2/pkg/proxy/imagebuilder.go

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,14 @@ package proxy
66

77
import (
88
"context"
9+
"errors"
10+
"io"
911

12+
"github.com/gitpod-io/gitpod/common-go/log"
1013
"github.com/gitpod-io/gitpod/image-builder/api"
14+
15+
"google.golang.org/grpc/codes"
16+
"google.golang.org/grpc/status"
1117
"google.golang.org/protobuf/proto"
1218
)
1319

@@ -58,7 +64,7 @@ func forwardStream[R ProtoMessage](ctx context.Context, recv func() (R, error),
5864
for {
5965
resp, err := recv()
6066
if err != nil {
61-
return err
67+
return handleProxyError(err)
6268
}
6369

6470
// generic hack, can't compare R to nil because R's default value is unclear (not even sure this is nil)
@@ -69,9 +75,47 @@ func forwardStream[R ProtoMessage](ctx context.Context, recv func() (R, error),
6975
}
7076
err = send(resp)
7177
if err != nil {
72-
return err
78+
return handleProxyError(err)
7379
}
7480
}
7581

7682
return nil
7783
}
84+
85+
// handleProxyError ensures all errors have proper gRPC status codes
86+
func handleProxyError(err error) error {
87+
if err == nil {
88+
return nil
89+
}
90+
91+
// If it's already a gRPC status error, check for DeadlineExceeded
92+
if st, ok := status.FromError(err); ok {
93+
if st.Code() == codes.DeadlineExceeded {
94+
// Return nil (OK) for DeadlineExceeded as requested
95+
return nil
96+
}
97+
98+
log.WithError(err).WithField("code", status.Code(err)).Error("unexpected error while sending stream response upstream")
99+
return err
100+
}
101+
102+
// Handle context errors
103+
if errors.Is(err, context.DeadlineExceeded) {
104+
// Return nil (OK) for DeadlineExceeded
105+
return nil
106+
}
107+
108+
if errors.Is(err, io.EOF) {
109+
// Return nil (OK) for EOF, which is a normal when the client ends the stream
110+
return nil
111+
}
112+
113+
log.WithError(err).Error("unexpected error while sending stream response upstream")
114+
115+
if errors.Is(err, context.Canceled) {
116+
return status.Error(codes.Canceled, err.Error())
117+
}
118+
119+
// Wrap any other error as Internal
120+
return status.Error(codes.Internal, err.Error())
121+
}

0 commit comments

Comments
 (0)