Skip to content

Commit c22a812

Browse files
authored
Merge pull request #1227 from Sneha-at/hyperdisk_slo_sli
Add metrics for CSI server side error count
2 parents f7185c2 + 0b13b82 commit c22a812

File tree

7 files changed

+382
-286
lines changed

7 files changed

+382
-286
lines changed

cmd/gce-pd-csi-driver/main.go

+6-2
Original file line numberDiff line numberDiff line change
@@ -98,10 +98,14 @@ func handle() {
9898
}
9999
klog.V(2).Infof("Driver vendor version %v", version)
100100

101-
if *runControllerService && *httpEndpoint != "" && metrics.IsGKEComponentVersionAvailable() {
101+
if *runControllerService && *httpEndpoint != "" {
102102
mm := metrics.NewMetricsManager()
103103
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
104-
mm.EmitGKEComponentVersion()
104+
mm.RegisterPDCSIMetric()
105+
106+
if metrics.IsGKEComponentVersionAvailable() {
107+
mm.EmitGKEComponentVersion()
108+
}
105109
}
106110

107111
if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {

pkg/common/utils.go

+87
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,21 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"regexp"
2225
"strings"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
"k8s.io/apimachinery/pkg/api/resource"
2632
"k8s.io/apimachinery/pkg/util/sets"
2733
volumehelpers "k8s.io/cloud-provider/volume/helpers"
34+
"k8s.io/klog/v2"
2835
)
2936

3037
const (
@@ -288,3 +295,83 @@ func ParseMachineType(machineTypeUrl string) (string, error) {
288295
}
289296
return machineType[1], nil
290297
}
298+
299+
// CodeForError returns a pointer to the grpc error code that maps to the http
300+
// error code for the passed in user googleapi error or context error. Returns
301+
// codes.Internal if the given error is not a googleapi error caused by the user.
302+
// The following http error codes are considered user errors:
303+
// (1) http 400 Bad Request, returns grpc InvalidArgument,
304+
// (2) http 403 Forbidden, returns grpc PermissionDenied,
305+
// (3) http 404 Not Found, returns grpc NotFound
306+
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
307+
// The following errors are considered context errors:
308+
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
309+
// (2) "context canceled", returns grpc Canceled
310+
func CodeForError(err error) *codes.Code {
311+
if err == nil {
312+
return nil
313+
}
314+
315+
if errCode := existingErrorCode(err); errCode != nil {
316+
return errCode
317+
}
318+
if code := isContextError(err); code != nil {
319+
return code
320+
}
321+
322+
internalErrorCode := codes.Internal
323+
// Upwrap the error
324+
var apiErr *googleapi.Error
325+
if !errors.As(err, &apiErr) {
326+
return &internalErrorCode
327+
}
328+
329+
userErrors := map[int]codes.Code{
330+
http.StatusForbidden: codes.PermissionDenied,
331+
http.StatusBadRequest: codes.InvalidArgument,
332+
http.StatusTooManyRequests: codes.ResourceExhausted,
333+
http.StatusNotFound: codes.NotFound,
334+
}
335+
if code, ok := userErrors[apiErr.Code]; ok {
336+
return &code
337+
}
338+
339+
return &internalErrorCode
340+
}
341+
342+
// isContextError returns a pointer to the grpc error code DeadlineExceeded
343+
// if the passed in error contains the "context deadline exceeded" string and returns
344+
// the grpc error code Canceled if the error contains the "context canceled" string.
345+
func isContextError(err error) *codes.Code {
346+
if err == nil {
347+
return nil
348+
}
349+
350+
errStr := err.Error()
351+
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
352+
return errCodePtr(codes.DeadlineExceeded)
353+
}
354+
if strings.Contains(errStr, context.Canceled.Error()) {
355+
return errCodePtr(codes.Canceled)
356+
}
357+
return nil
358+
}
359+
360+
func existingErrorCode(err error) *codes.Code {
361+
if err == nil {
362+
return nil
363+
}
364+
if status, ok := status.FromError(err); ok {
365+
return errCodePtr(status.Code())
366+
}
367+
return nil
368+
}
369+
370+
func errCodePtr(code codes.Code) *codes.Code {
371+
return &code
372+
}
373+
374+
func LoggedError(msg string, err error) error {
375+
klog.Errorf(msg+"%v", err.Error())
376+
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
377+
}

pkg/common/utils_test.go

+112
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,17 @@ limitations under the License.
1717
package common
1818

1919
import (
20+
"context"
21+
"errors"
2022
"fmt"
23+
"net/http"
2124
"reflect"
2225
"testing"
2326

2427
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
28+
"google.golang.org/api/googleapi"
29+
"google.golang.org/grpc/codes"
30+
"google.golang.org/grpc/status"
2531
)
2632

2733
const (
@@ -853,3 +859,109 @@ func TestParseMachineType(t *testing.T) {
853859
})
854860
}
855861
}
862+
863+
func TestCodeForError(t *testing.T) {
864+
internalErrorCode := codes.Internal
865+
userErrorCode := codes.InvalidArgument
866+
testCases := []struct {
867+
name string
868+
inputErr error
869+
expCode *codes.Code
870+
}{
871+
{
872+
name: "Not googleapi.Error",
873+
inputErr: errors.New("I am not a googleapi.Error"),
874+
expCode: &internalErrorCode,
875+
},
876+
{
877+
name: "User error",
878+
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
879+
expCode: &userErrorCode,
880+
},
881+
{
882+
name: "googleapi.Error but not a user error",
883+
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
884+
expCode: &internalErrorCode,
885+
},
886+
{
887+
name: "context canceled error",
888+
inputErr: context.Canceled,
889+
expCode: errCodePtr(codes.Canceled),
890+
},
891+
{
892+
name: "context deadline exceeded error",
893+
inputErr: context.DeadlineExceeded,
894+
expCode: errCodePtr(codes.DeadlineExceeded),
895+
},
896+
{
897+
name: "status error with Aborted error code",
898+
inputErr: status.Error(codes.Aborted, "aborted error"),
899+
expCode: errCodePtr(codes.Aborted),
900+
},
901+
{
902+
name: "nil error",
903+
inputErr: nil,
904+
expCode: nil,
905+
},
906+
}
907+
908+
for _, tc := range testCases {
909+
t.Logf("Running test: %v", tc.name)
910+
errCode := CodeForError(tc.inputErr)
911+
if (tc.expCode == nil) != (errCode == nil) {
912+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
913+
}
914+
if tc.expCode != nil && *errCode != *tc.expCode {
915+
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
916+
}
917+
}
918+
}
919+
920+
func TestIsContextError(t *testing.T) {
921+
cases := []struct {
922+
name string
923+
err error
924+
expectedErrCode *codes.Code
925+
}{
926+
{
927+
name: "deadline exceeded error",
928+
err: context.DeadlineExceeded,
929+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
930+
},
931+
{
932+
name: "contains 'context deadline exceeded'",
933+
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
934+
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
935+
},
936+
{
937+
name: "context canceled error",
938+
err: context.Canceled,
939+
expectedErrCode: errCodePtr(codes.Canceled),
940+
},
941+
{
942+
name: "contains 'context canceled'",
943+
err: fmt.Errorf("got error: %w", context.Canceled),
944+
expectedErrCode: errCodePtr(codes.Canceled),
945+
},
946+
{
947+
name: "does not contain 'context canceled' or 'context deadline exceeded'",
948+
err: fmt.Errorf("unknown error"),
949+
expectedErrCode: nil,
950+
},
951+
{
952+
name: "nil error",
953+
err: nil,
954+
expectedErrCode: nil,
955+
},
956+
}
957+
958+
for _, test := range cases {
959+
errCode := isContextError(test.err)
960+
if (test.expectedErrCode == nil) != (errCode == nil) {
961+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
962+
}
963+
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
964+
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
965+
}
966+
}
967+
}

0 commit comments

Comments
 (0)