Skip to content

Add metrics for CSI server side error count #1227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions cmd/gce-pd-csi-driver/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,14 @@ func handle() {
}
klog.V(2).Infof("Driver vendor version %v", version)

if *runControllerService && *httpEndpoint != "" && metrics.IsGKEComponentVersionAvailable() {
if *runControllerService && *httpEndpoint != "" {
mm := metrics.NewMetricsManager()
mm.InitializeHttpHandler(*httpEndpoint, *metricsPath)
mm.EmitGKEComponentVersion()
mm.RegisterPDCSIMetric()

if metrics.IsGKEComponentVersionAvailable() {
mm.EmitGKEComponentVersion()
}
}

if len(*extraVolumeLabelsStr) > 0 && !*runControllerService {
Expand Down
87 changes: 87 additions & 0 deletions pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ limitations under the License.
package common

import (
"context"
"errors"
"fmt"
"net/http"
"regexp"
"strings"

"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
"google.golang.org/api/googleapi"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/util/sets"
volumehelpers "k8s.io/cloud-provider/volume/helpers"
"k8s.io/klog/v2"
)

const (
Expand Down Expand Up @@ -288,3 +295,83 @@ func ParseMachineType(machineTypeUrl string) (string, error) {
}
return machineType[1], nil
}

// CodeForError returns a pointer to the grpc error code that maps to the http
// error code for the passed in user googleapi error or context error. Returns
// codes.Internal if the given error is not a googleapi error caused by the user.
// The following http error codes are considered user errors:
// (1) http 400 Bad Request, returns grpc InvalidArgument,
// (2) http 403 Forbidden, returns grpc PermissionDenied,
// (3) http 404 Not Found, returns grpc NotFound
// (4) http 429 Too Many Requests, returns grpc ResourceExhausted
// The following errors are considered context errors:
// (1) "context deadline exceeded", returns grpc DeadlineExceeded,
// (2) "context canceled", returns grpc Canceled
func CodeForError(err error) *codes.Code {
if err == nil {
return nil
}

if errCode := existingErrorCode(err); errCode != nil {
return errCode
}
if code := isContextError(err); code != nil {
return code
}

internalErrorCode := codes.Internal
// Upwrap the error
var apiErr *googleapi.Error
if !errors.As(err, &apiErr) {
return &internalErrorCode
}

userErrors := map[int]codes.Code{
http.StatusForbidden: codes.PermissionDenied,
http.StatusBadRequest: codes.InvalidArgument,
http.StatusTooManyRequests: codes.ResourceExhausted,
http.StatusNotFound: codes.NotFound,
}
if code, ok := userErrors[apiErr.Code]; ok {
return &code
}

return &internalErrorCode
}

// isContextError returns a pointer to the grpc error code DeadlineExceeded
// if the passed in error contains the "context deadline exceeded" string and returns
// the grpc error code Canceled if the error contains the "context canceled" string.
func isContextError(err error) *codes.Code {
if err == nil {
return nil
}

errStr := err.Error()
if strings.Contains(errStr, context.DeadlineExceeded.Error()) {
return errCodePtr(codes.DeadlineExceeded)
}
if strings.Contains(errStr, context.Canceled.Error()) {
return errCodePtr(codes.Canceled)
}
return nil
}

func existingErrorCode(err error) *codes.Code {
if err == nil {
return nil
}
if status, ok := status.FromError(err); ok {
return errCodePtr(status.Code())
}
return nil
}

func errCodePtr(code codes.Code) *codes.Code {
return &code
}

func LoggedError(msg string, err error) error {
klog.Errorf(msg+"%v", err.Error())
return status.Errorf(*CodeForError(err), msg+"%v", err.Error())
}
112 changes: 112 additions & 0 deletions pkg/common/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,17 @@ limitations under the License.
package common

import (
"context"
"errors"
"fmt"
"net/http"
"reflect"
"testing"

"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
"google.golang.org/api/googleapi"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

const (
Expand Down Expand Up @@ -853,3 +859,109 @@ func TestParseMachineType(t *testing.T) {
})
}
}

func TestCodeForError(t *testing.T) {
internalErrorCode := codes.Internal
userErrorCode := codes.InvalidArgument
testCases := []struct {
name string
inputErr error
expCode *codes.Code
}{
{
name: "Not googleapi.Error",
inputErr: errors.New("I am not a googleapi.Error"),
expCode: &internalErrorCode,
},
{
name: "User error",
inputErr: &googleapi.Error{Code: http.StatusBadRequest, Message: "User error with bad request"},
expCode: &userErrorCode,
},
{
name: "googleapi.Error but not a user error",
inputErr: &googleapi.Error{Code: http.StatusInternalServerError, Message: "Internal error"},
expCode: &internalErrorCode,
},
{
name: "context canceled error",
inputErr: context.Canceled,
expCode: errCodePtr(codes.Canceled),
},
{
name: "context deadline exceeded error",
inputErr: context.DeadlineExceeded,
expCode: errCodePtr(codes.DeadlineExceeded),
},
{
name: "status error with Aborted error code",
inputErr: status.Error(codes.Aborted, "aborted error"),
expCode: errCodePtr(codes.Aborted),
},
{
name: "nil error",
inputErr: nil,
expCode: nil,
},
}

for _, tc := range testCases {
t.Logf("Running test: %v", tc.name)
errCode := CodeForError(tc.inputErr)
if (tc.expCode == nil) != (errCode == nil) {
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
}
if tc.expCode != nil && *errCode != *tc.expCode {
t.Errorf("test %v failed: got %v, expected %v", tc.name, errCode, tc.expCode)
}
}
}

func TestIsContextError(t *testing.T) {
cases := []struct {
name string
err error
expectedErrCode *codes.Code
}{
{
name: "deadline exceeded error",
err: context.DeadlineExceeded,
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
},
{
name: "contains 'context deadline exceeded'",
err: fmt.Errorf("got error: %w", context.DeadlineExceeded),
expectedErrCode: errCodePtr(codes.DeadlineExceeded),
},
{
name: "context canceled error",
err: context.Canceled,
expectedErrCode: errCodePtr(codes.Canceled),
},
{
name: "contains 'context canceled'",
err: fmt.Errorf("got error: %w", context.Canceled),
expectedErrCode: errCodePtr(codes.Canceled),
},
{
name: "does not contain 'context canceled' or 'context deadline exceeded'",
err: fmt.Errorf("unknown error"),
expectedErrCode: nil,
},
{
name: "nil error",
err: nil,
expectedErrCode: nil,
},
}

for _, test := range cases {
errCode := isContextError(test.err)
if (test.expectedErrCode == nil) != (errCode == nil) {
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
}
if test.expectedErrCode != nil && *errCode != *test.expectedErrCode {
t.Errorf("test %v failed: got %v, expected %v", test.name, errCode, test.expectedErrCode)
}
}
}
Loading