Skip to content

Commit 6fe2c01

Browse files
authored
Merge pull request #1311 from k8s-infra-cherrypick-robot/cherry-pick-1298-to-release-1.10
[release-1.10] Use original error code when backing off
2 parents c164aca + 0ef764c commit 6fe2c01

File tree

4 files changed

+123
-102
lines changed

4 files changed

+123
-102
lines changed

pkg/common/utils.go

+37-48
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,14 @@ var (
7979
// Full or partial URL of the machine type resource, in the format:
8080
// zones/zone/machineTypes/machine-type
8181
machineTypeRegex = regexp.MustCompile(machineTypePattern)
82+
83+
// userErrorCodeMap tells how API error types are translated to error codes.
84+
userErrorCodeMap = map[int]codes.Code{
85+
http.StatusForbidden: codes.PermissionDenied,
86+
http.StatusBadRequest: codes.InvalidArgument,
87+
http.StatusTooManyRequests: codes.ResourceExhausted,
88+
http.StatusNotFound: codes.NotFound,
89+
}
8290
)
8391

8492
func BytesToGbRoundDown(bytes int64) int64 {
@@ -318,82 +326,63 @@ func ParseMachineType(machineTypeUrl string) (string, error) {
318326
return machineType[1], nil
319327
}
320328

321-
// CodeForError returns a pointer to the grpc error code that maps to the http
322-
// error code for the passed in user googleapi error or context error. Returns
323-
// codes.Internal if the given error is not a googleapi error caused by the user.
324-
// The following http error codes are considered user errors:
325-
// (1) http 400 Bad Request, returns grpc InvalidArgument,
326-
// (2) http 403 Forbidden, returns grpc PermissionDenied,
327-
// (3) http 404 Not Found, returns grpc NotFound
328-
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
329-
// The following errors are considered context errors:
330-
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
331-
// (2) "context canceled", returns grpc Canceled
332-
func CodeForError(err error) *codes.Code {
333-
if err == nil {
334-
return nil
329+
// CodeForError returns the grpc error code that maps to the http error code for the
330+
// passed in user googleapi error or context error. Returns codes.Internal if the given
331+
// error is not a googleapi error caused by the user. userErrorCodeMap is used for
332+
// encoding most errors.
333+
func CodeForError(sourceError error) codes.Code {
334+
if sourceError == nil {
335+
return codes.Internal
335336
}
336337

337-
if errCode := existingErrorCode(err); errCode != nil {
338-
return errCode
338+
if code, err := existingErrorCode(sourceError); err == nil {
339+
return code
339340
}
340-
if code := isContextError(err); code != nil {
341+
if code, err := isContextError(sourceError); err == nil {
341342
return code
342343
}
343344

344-
internalErrorCode := codes.Internal
345-
// Upwrap the error
346345
var apiErr *googleapi.Error
347-
if !errors.As(err, &apiErr) {
348-
return &internalErrorCode
349-
}
350-
351-
userErrors := map[int]codes.Code{
352-
http.StatusForbidden: codes.PermissionDenied,
353-
http.StatusBadRequest: codes.InvalidArgument,
354-
http.StatusTooManyRequests: codes.ResourceExhausted,
355-
http.StatusNotFound: codes.NotFound,
346+
if !errors.As(sourceError, &apiErr) {
347+
return codes.Internal
356348
}
357-
if code, ok := userErrors[apiErr.Code]; ok {
358-
return &code
349+
if code, ok := userErrorCodeMap[apiErr.Code]; ok {
350+
return code
359351
}
360352

361-
return &internalErrorCode
353+
return codes.Internal
362354
}
363355

364-
// isContextError returns a pointer to the grpc error code DeadlineExceeded
365-
// if the passed in error contains the "context deadline exceeded" string and returns
366-
// the grpc error code Canceled if the error contains the "context canceled" string.
367-
func isContextError(err error) *codes.Code {
356+
// isContextError returns the grpc error code DeadlineExceeded if the passed in error
357+
// contains the "context deadline exceeded" string and returns the grpc error code
358+
// Canceled if the error contains the "context canceled" string. It returns and error if
359+
// err isn't a context error.
360+
func isContextError(err error) (codes.Code, error) {
368361
if err == nil {
369-
return nil
362+
return codes.Unknown, fmt.Errorf("null error")
370363
}
371364

372365
errStr := err.Error()
373366
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
374-
return errCodePtr(codes.DeadlineExceeded)
367+
return codes.DeadlineExceeded, nil
375368
}
376369
if strings.Contains(errStr, context.Canceled.Error()) {
377-
return errCodePtr(codes.Canceled)
370+
return codes.Canceled, nil
378371
}
379-
return nil
372+
return codes.Unknown, fmt.Errorf("Not a context error: %w", err)
380373
}
381374

382-
func existingErrorCode(err error) *codes.Code {
375+
func existingErrorCode(err error) (codes.Code, error) {
383376
if err == nil {
384-
return nil
377+
return codes.Unknown, fmt.Errorf("null error")
385378
}
386379
if status, ok := status.FromError(err); ok {
387-
return errCodePtr(status.Code())
380+
return status.Code(), nil
388381
}
389-
return nil
390-
}
391-
392-
func errCodePtr(code codes.Code) *codes.Code {
393-
return &code
382+
return codes.Unknown, fmt.Errorf("no existing error code for %w", err)
394383
}
395384

396385
func LoggedError(msg string, err error) error {
397386
klog.Errorf(msg+"%v", err.Error())
398-
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
387+
return status.Errorf(CodeForError(err), msg+"%v", err.Error())
399388
}

pkg/common/utils_test.go

+27-31
Original file line numberDiff line numberDiff line change
@@ -975,57 +975,51 @@ func TestParseMachineType(t *testing.T) {
975975
}
976976

977977
func TestCodeForError(t *testing.T) {
978-
internalErrorCode := codes.Internal
979-
userErrorCode := codes.InvalidArgument
980978
testCases := []struct {
981979
name string
982980
inputErr error
983-
expCode *codes.Code
981+
expCode codes.Code
984982
}{
985983
{
986984
name: "Not googleapi.Error",
987985
inputErr: errors.New("I am not a googleapi.Error"),
988-
expCode: &internalErrorCode,
986+
expCode: codes.Internal,
989987
},
990988
{
991989
name: "User error",
992990
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
993-
expCode: &userErrorCode,
991+
expCode: codes.InvalidArgument,
994992
},
995993
{
996994
name: "googleapi.Error but not a user error",
997995
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
998-
expCode: &internalErrorCode,
996+
expCode: codes.Internal,
999997
},
1000998
{
1001999
name: "context canceled error",
10021000
inputErr: context.Canceled,
1003-
expCode: errCodePtr(codes.Canceled),
1001+
expCode: codes.Canceled,
10041002
},
10051003
{
10061004
name: "context deadline exceeded error",
10071005
inputErr: context.DeadlineExceeded,
1008-
expCode: errCodePtr(codes.DeadlineExceeded),
1006+
expCode: codes.DeadlineExceeded,
10091007
},
10101008
{
10111009
name: "status error with Aborted error code",
10121010
inputErr: status.Error(codes.Aborted, "aborted error"),
1013-
expCode: errCodePtr(codes.Aborted),
1011+
expCode: codes.Aborted,
10141012
},
10151013
{
10161014
name: "nil error",
10171015
inputErr: nil,
1018-
expCode: nil,
1016+
expCode: codes.Internal,
10191017
},
10201018
}
10211019

10221020
for _, tc := range testCases {
1023-
t.Logf("Running test: %v", tc.name)
10241021
errCode := CodeForError(tc.inputErr)
1025-
if (tc.expCode == nil) != (errCode == nil) {
1026-
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
1027-
}
1028-
if tc.expCode != nil && *errCode != *tc.expCode {
1022+
if errCode != tc.expCode {
10291023
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
10301024
}
10311025
}
@@ -1035,46 +1029,48 @@ func TestIsContextError(t *testing.T) {
10351029
cases := []struct {
10361030
name string
10371031
err error
1038-
expectedErrCode *codes.Code
1032+
expectedErrCode codes.Code
1033+
expectError bool
10391034
}{
10401035
{
10411036
name: "deadline exceeded error",
10421037
err: context.DeadlineExceeded,
1043-
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
1038+
expectedErrCode: codes.DeadlineExceeded,
10441039
},
10451040
{
10461041
name: "contains 'context deadline exceeded'",
10471042
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
1048-
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
1043+
expectedErrCode: codes.DeadlineExceeded,
10491044
},
10501045
{
10511046
name: "context canceled error",
10521047
err: context.Canceled,
1053-
expectedErrCode: errCodePtr(codes.Canceled),
1048+
expectedErrCode: codes.Canceled,
10541049
},
10551050
{
10561051
name: "contains 'context canceled'",
10571052
err: fmt.Errorf("got error: %w", context.Canceled),
1058-
expectedErrCode: errCodePtr(codes.Canceled),
1053+
expectedErrCode: codes.Canceled,
10591054
},
10601055
{
1061-
name: "does not contain 'context canceled' or 'context deadline exceeded'",
1062-
err: fmt.Errorf("unknown error"),
1063-
expectedErrCode: nil,
1056+
name: "does not contain 'context canceled' or 'context deadline exceeded'",
1057+
err: fmt.Errorf("unknown error"),
1058+
expectError: true,
10641059
},
10651060
{
1066-
name: "nil error",
1067-
err: nil,
1068-
expectedErrCode: nil,
1061+
name: "nil error",
1062+
err: nil,
1063+
expectError: true,
10691064
},
10701065
}
10711066

10721067
for _, test := range cases {
1073-
errCode := isContextError(test.err)
1074-
if (test.expectedErrCode == nil) != (errCode == nil) {
1075-
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
1076-
}
1077-
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
1068+
errCode, err := isContextError(test.err)
1069+
if test.expectError {
1070+
if err == nil {
1071+
t.Errorf("test %v failed, expected error, got %v", test.name, errCode)
1072+
}
1073+
} else if errCode != test.expectedErrCode {
10781074
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
10791075
}
10801076
}

pkg/gce-pd-csi-driver/controller.go

+24-10
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,12 @@ type GCEControllerServer struct {
9292
errorBackoff *csiErrorBackoff
9393
}
9494

95+
type csiErrorBackoffId string
96+
9597
type csiErrorBackoff struct {
96-
backoff *flowcontrol.Backoff
98+
backoff *flowcontrol.Backoff
99+
errorCodes map[csiErrorBackoffId]codes.Code
97100
}
98-
type csiErrorBackoffId string
99101

100102
type workItem struct {
101103
ctx context.Context
@@ -504,13 +506,13 @@ func (gceCS *GCEControllerServer) ControllerPublishVolume(ctx context.Context, r
504506

505507
backoffId := gceCS.errorBackoff.backoffId(req.NodeId, req.VolumeId)
506508
if gceCS.errorBackoff.blocking(backoffId) {
507-
return nil, status.Errorf(codes.Unavailable, "ControllerPublish not permitted on node %q due to backoff condition", req.NodeId)
509+
return nil, status.Errorf(gceCS.errorBackoff.code(backoffId), "ControllerPublish not permitted on node %q due to backoff condition", req.NodeId)
508510
}
509511

510512
resp, err, diskTypeForMetric := gceCS.executeControllerPublishVolume(ctx, req)
511513
if err != nil {
512-
klog.Infof("For node %s adding backoff due to error for volume %s: %v", req.NodeId, req.VolumeId, err.Error())
513-
gceCS.errorBackoff.next(backoffId)
514+
klog.Infof("For node %s adding backoff due to error for volume %s: %v", req.NodeId, req.VolumeId, err)
515+
gceCS.errorBackoff.next(backoffId, common.CodeForError(err))
514516
} else {
515517
klog.Infof("For node %s clear backoff due to successful publish of volume %v", req.NodeId, req.VolumeId)
516518
gceCS.errorBackoff.reset(backoffId)
@@ -669,12 +671,12 @@ func (gceCS *GCEControllerServer) ControllerUnpublishVolume(ctx context.Context,
669671
// Only valid requests will be queued
670672
backoffId := gceCS.errorBackoff.backoffId(req.NodeId, req.VolumeId)
671673
if gceCS.errorBackoff.blocking(backoffId) {
672-
return nil, status.Errorf(codes.Unavailable, "ControllerUnpublish not permitted on node %q due to backoff condition", req.NodeId)
674+
return nil, status.Errorf(gceCS.errorBackoff.code(backoffId), "ControllerUnpublish not permitted on node %q due to backoff condition", req.NodeId)
673675
}
674676
resp, err, diskTypeForMetric := gceCS.executeControllerUnpublishVolume(ctx, req)
675677
if err != nil {
676-
klog.Infof("For node %s adding backoff due to error for volume %s", req.NodeId, req.VolumeId)
677-
gceCS.errorBackoff.next(backoffId)
678+
klog.Infof("For node %s adding backoff due to error for volume %s: %v", req.NodeId, req.VolumeId, err)
679+
gceCS.errorBackoff.next(backoffId, common.CodeForError(err))
678680
} else {
679681
klog.Infof("For node %s clear backoff due to successful unpublish of volume %v", req.NodeId, req.VolumeId)
680682
gceCS.errorBackoff.reset(backoffId)
@@ -1876,7 +1878,7 @@ func pickRandAndConsecutive(slice []string, n int) ([]string, error) {
18761878
}
18771879

18781880
func newCsiErrorBackoff(initialDuration, errorBackoffMaxDuration time.Duration) *csiErrorBackoff {
1879-
return &csiErrorBackoff{flowcontrol.NewBackOff(initialDuration, errorBackoffMaxDuration)}
1881+
return &csiErrorBackoff{flowcontrol.NewBackOff(initialDuration, errorBackoffMaxDuration), make(map[csiErrorBackoffId]codes.Code)}
18801882
}
18811883

18821884
func (_ *csiErrorBackoff) backoffId(nodeId, volumeId string) csiErrorBackoffId {
@@ -1888,10 +1890,22 @@ func (b *csiErrorBackoff) blocking(id csiErrorBackoffId) bool {
18881890
return blk
18891891
}
18901892

1891-
func (b *csiErrorBackoff) next(id csiErrorBackoffId) {
1893+
func (b *csiErrorBackoff) code(id csiErrorBackoffId) codes.Code {
1894+
if code, ok := b.errorCodes[id]; ok {
1895+
return code
1896+
}
1897+
// If we haven't recorded a code, return unavailable, which signals a problem with the driver
1898+
// (ie, next() wasn't called correctly).
1899+
klog.Errorf("using default code for %s", id)
1900+
return codes.Unavailable
1901+
}
1902+
1903+
func (b *csiErrorBackoff) next(id csiErrorBackoffId, code codes.Code) {
18921904
b.backoff.Next(string(id), b.backoff.Clock.Now())
1905+
b.errorCodes[id] = code
18931906
}
18941907

18951908
func (b *csiErrorBackoff) reset(id csiErrorBackoffId) {
18961909
b.backoff.Reset(string(id))
1910+
delete(b.errorCodes, id)
18971911
}

0 commit comments

Comments
 (0)