Skip to content

Commit e9a2242

Browse files
committed
Introduce CSI Metrics Library
1 parent d9213e0 commit e9a2242

File tree

7 files changed

+688
-40
lines changed

7 files changed

+688
-40
lines changed

connection/connection.go

Lines changed: 34 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
"strings"
2525
"time"
2626

27+
"github.com/kubernetes-csi/csi-lib-utils/metrics"
2728
"github.com/kubernetes-csi/csi-lib-utils/protosanitizer"
2829
"google.golang.org/grpc"
2930
"k8s.io/klog"
@@ -58,8 +59,8 @@ const terminationLogPath = "/dev/termination-log"
5859
//
5960
// For other connections, the default behavior from gRPC is used and
6061
// loss of connection is not detected reliably.
61-
func Connect(address string, options ...Option) (*grpc.ClientConn, error) {
62-
return connect(address, []grpc.DialOption{}, options)
62+
func Connect(address string, metricsManager metrics.CSIMetricsManager, options ...Option) (*grpc.ClientConn, error) {
63+
return connect(address, metricsManager, []grpc.DialOption{}, options)
6364
}
6465

6566
// Option is the type of all optional parameters for Connect.
@@ -93,7 +94,10 @@ type options struct {
9394
}
9495

9596
// connect is the internal implementation of Connect. It has more options to enable testing.
96-
func connect(address string, dialOptions []grpc.DialOption, connectOptions []Option) (*grpc.ClientConn, error) {
97+
func connect(
98+
address string,
99+
metricsManager metrics.CSIMetricsManager,
100+
dialOptions []grpc.DialOption, connectOptions []Option) (*grpc.ClientConn, error) {
97101
var o options
98102
for _, option := range connectOptions {
99103
option(&o)
@@ -103,7 +107,10 @@ func connect(address string, dialOptions []grpc.DialOption, connectOptions []Opt
103107
grpc.WithInsecure(), // Don't use TLS, it's usually local Unix domain socket in a container.
104108
grpc.WithBackoffMaxDelay(time.Second), // Retry every second after failure.
105109
grpc.WithBlock(), // Block until connection succeeds.
106-
grpc.WithUnaryInterceptor(LogGRPC), // Log all messages.
110+
grpc.WithChainUnaryInterceptor(
111+
LogGRPC, // Log all messages.
112+
extendedCSIMetricsManager{metricsManager}.recordMetricsInterceptor, // Record metrics for each gRPC call.
113+
),
107114
)
108115
unixPrefix := "unix://"
109116
if strings.HasPrefix(address, "/") {
@@ -179,3 +186,26 @@ func LogGRPC(ctx context.Context, method string, req, reply interface{}, cc *grp
179186
klog.V(5).Infof("GRPC error: %v", err)
180187
return err
181188
}
189+
190+
type extendedCSIMetricsManager struct {
191+
metrics.CSIMetricsManager
192+
}
193+
194+
// recordMetricsInterceptor is a gPRC unary interceptor for recording metrics for CSI operations.
195+
func (cmm extendedCSIMetricsManager) recordMetricsInterceptor(
196+
ctx context.Context,
197+
method string,
198+
req, reply interface{},
199+
cc *grpc.ClientConn,
200+
invoker grpc.UnaryInvoker,
201+
opts ...grpc.CallOption) error {
202+
start := time.Now()
203+
err := invoker(ctx, method, req, reply, cc, opts...)
204+
duration := time.Since(start)
205+
cmm.RecordMetrics(
206+
method, /* operationName */
207+
err, /* operationErr */
208+
duration, /* operationDuration */
209+
)
210+
return err
211+
}

connection/connection_test.go

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"net"
2323
"os"
2424
"path"
25+
"strings"
2526
"sync"
2627
"testing"
2728
"time"
@@ -35,6 +36,9 @@ import (
3536
"github.com/stretchr/testify/require"
3637

3738
"github.com/container-storage-interface/spec/lib/go/csi"
39+
40+
"github.com/kubernetes-csi/csi-lib-utils/metrics"
41+
"k8s.io/component-base/metrics/testutil"
3842
)
3943

4044
func tmpDir(t *testing.T) string {
@@ -84,7 +88,7 @@ func TestConnect(t *testing.T) {
8488
addr, stopServer := startServer(t, tmp, nil, nil)
8589
defer stopServer()
8690

87-
conn, err := Connect(addr)
91+
conn, err := Connect(addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"))
8892
if assert.NoError(t, err, "connect via absolute path") &&
8993
assert.NotNil(t, conn, "got a connection") {
9094
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
@@ -99,7 +103,7 @@ func TestConnectUnix(t *testing.T) {
99103
addr, stopServer := startServer(t, tmp, nil, nil)
100104
defer stopServer()
101105

102-
conn, err := Connect("unix:///" + addr)
106+
conn, err := Connect("unix:///"+addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"))
103107
if assert.NoError(t, err, "connect with unix:/// prefix") &&
104108
assert.NotNil(t, conn, "got a connection") {
105109
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
@@ -139,7 +143,7 @@ func TestWaitForServer(t *testing.T) {
139143
startTimeServer = time.Now()
140144
_, stopServer = startServer(t, tmp, nil, nil)
141145
}()
142-
conn, err := Connect(path.Join(tmp, serverSock))
146+
conn, err := Connect(path.Join(tmp, serverSock), metrics.NewCSIMetricsManager("fake.csi.driver.io"))
143147
if assert.NoError(t, err, "connect via absolute path") {
144148
endTime := time.Now()
145149
assert.NotNil(t, conn, "got a connection")
@@ -158,7 +162,7 @@ func TestTimout(t *testing.T) {
158162

159163
startTime := time.Now()
160164
timeout := 5 * time.Second
161-
conn, err := connect(path.Join(tmp, "no-such.sock"), []grpc.DialOption{grpc.WithTimeout(timeout)}, nil)
165+
conn, err := connect(path.Join(tmp, "no-such.sock"), metrics.NewCSIMetricsManager("fake.csi.driver.io"), []grpc.DialOption{grpc.WithTimeout(timeout)}, nil)
162166
endTime := time.Now()
163167
if assert.Error(t, err, "connection should fail") {
164168
assert.InEpsilon(t, timeout, endTime.Sub(startTime), 1, "connection timeout")
@@ -177,7 +181,7 @@ func TestReconnect(t *testing.T) {
177181
}()
178182

179183
// Allow reconnection (the default).
180-
conn, err := Connect(addr)
184+
conn, err := Connect(addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"))
181185
if assert.NoError(t, err, "connect via absolute path") &&
182186
assert.NotNil(t, conn, "got a connection") {
183187
defer conn.Close()
@@ -222,7 +226,7 @@ func TestDisconnect(t *testing.T) {
222226
}()
223227

224228
reconnectCount := 0
225-
conn, err := Connect(addr, OnConnectionLoss(func() bool {
229+
conn, err := Connect(addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"), OnConnectionLoss(func() bool {
226230
reconnectCount++
227231
// Don't reconnect.
228232
return false
@@ -273,7 +277,7 @@ func TestExplicitReconnect(t *testing.T) {
273277
}()
274278

275279
reconnectCount := 0
276-
conn, err := Connect(addr, OnConnectionLoss(func() bool {
280+
conn, err := Connect(addr, metrics.NewCSIMetricsManager("fake.csi.driver.io"), OnConnectionLoss(func() bool {
277281
reconnectCount++
278282
// Reconnect.
279283
return true
@@ -314,3 +318,81 @@ func TestExplicitReconnect(t *testing.T) {
314318
assert.Equal(t, 1, reconnectCount, "connection loss callback should be called once")
315319
}
316320
}
321+
322+
func TestConnectMetrics(t *testing.T) {
323+
tmp := tmpDir(t)
324+
defer os.RemoveAll(tmp)
325+
addr, stopServer := startServer(t, tmp, nil, nil)
326+
defer stopServer()
327+
328+
cmm := metrics.NewCSIMetricsManager("fake.csi.driver.io")
329+
conn, err := Connect(addr, cmm)
330+
if assert.NoError(t, err, "connect via absolute path") &&
331+
assert.NotNil(t, conn, "got a connection") {
332+
defer conn.Close()
333+
assert.Equal(t, connectivity.Ready, conn.GetState(), "connection ready")
334+
335+
if err := conn.Invoke(context.Background(), "/csi.v1.Controller/ControllerGetCapabilities", nil, nil); assert.Error(t, err) {
336+
errStatus, _ := status.FromError(err)
337+
assert.Equal(t, codes.Unimplemented, errStatus.Code(), "not implemented")
338+
}
339+
}
340+
341+
expectedMetrics := `# HELP csi_sidecar_operations_seconds [ALPHA] Container Storage Interface operation duration with gRPC error code status total
342+
# TYPE csi_sidecar_operations_seconds histogram
343+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="0.1"} 1
344+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="0.25"} 1
345+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="0.5"} 1
346+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="1"} 1
347+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="2.5"} 1
348+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="5"} 1
349+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="10"} 1
350+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="15"} 1
351+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="25"} 1
352+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="50"} 1
353+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="120"} 1
354+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="300"} 1
355+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="600"} 1
356+
csi_sidecar_operations_seconds_bucket{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities",le="+Inf"} 1
357+
csi_sidecar_operations_seconds_sum{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities"} 0
358+
csi_sidecar_operations_seconds_count{driver_name="fake.csi.driver.io",grpc_status_code="Unimplemented",method_name="/csi.v1.Controller/ControllerGetCapabilities"} 1
359+
`
360+
361+
if err := testutil.GatherAndCompare(
362+
cmm.GetRegistry(), strings.NewReader(expectedMetrics)); err != nil {
363+
// Ignore mismatches on csi_sidecar_operations_seconds_sum metric because execution time will vary from test to test.
364+
err = verifyMetricsError(t, err, "csi_sidecar_operations_seconds_sum")
365+
if err != nil {
366+
t.Errorf("Expected metrics not found -- %v", err)
367+
}
368+
}
369+
}
370+
371+
func verifyMetricsError(t *testing.T, err error, metricToIgnore string) error {
372+
errStringArr := strings.Split(err.Error(), "got:")
373+
374+
if len(errStringArr) != 2 {
375+
return err
376+
}
377+
378+
want := errStringArr[0]
379+
got := strings.TrimSpace(errStringArr[1])
380+
381+
if want == "" || got == "" {
382+
return err
383+
}
384+
385+
wantArr := strings.Split(err.Error(), "want:")
386+
if len(wantArr) != 2 {
387+
return err
388+
}
389+
390+
want = strings.TrimSpace(wantArr[1])
391+
392+
if matchErr := metrics.VerifyMetricsMatch(want, got, metricToIgnore); matchErr != nil {
393+
t.Errorf("%v", matchErr)
394+
return err
395+
}
396+
397+
return nil
398+
}

go.mod

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,36 +4,17 @@ go 1.12
44

55
require (
66
github.com/container-storage-interface/spec v1.1.0
7-
github.com/davecgh/go-spew v1.1.1 // indirect
87
github.com/evanphx/json-patch v4.5.0+incompatible // indirect
9-
github.com/gogo/protobuf v1.2.1 // indirect
108
github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef // indirect
11-
github.com/golang/protobuf v1.3.1
12-
github.com/google/gofuzz v0.0.0-20170612174753-24818f796faf // indirect
9+
github.com/golang/protobuf v1.3.2
1310
github.com/googleapis/gnostic v0.2.0 // indirect
14-
github.com/json-iterator/go v1.1.6 // indirect
15-
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
16-
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 // indirect
1711
github.com/onsi/ginkgo v1.10.2 // indirect
18-
github.com/onsi/gomega v1.7.0 // indirect
1912
github.com/pkg/errors v0.8.1 // indirect
20-
github.com/spf13/pflag v1.0.5 // indirect
21-
github.com/stretchr/testify v1.3.0
22-
golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c // indirect
23-
golang.org/x/net v0.0.0-20190328230028-74de082e2cca
24-
golang.org/x/oauth2 v0.0.0-20190319182350-c85d3e98c914 // indirect
25-
golang.org/x/sys v0.0.0-20190329044733-9eb1bfa1ce65 // indirect
26-
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4 // indirect
27-
google.golang.org/appengine v1.5.0 // indirect
28-
google.golang.org/genproto v0.0.0-20190327125643-d831d65fe17d // indirect
13+
github.com/stretchr/testify v1.4.0
14+
golang.org/x/net v0.0.0-20191004110552-13f9640d40b9
2915
google.golang.org/grpc v1.19.1
30-
gopkg.in/inf.v0 v0.9.1 // indirect
31-
gopkg.in/yaml.v2 v2.2.2 // indirect
32-
k8s.io/api v0.0.0-20190313235455-40a48860b5ab
33-
k8s.io/apimachinery v0.0.0-20190313205120-d7deff9243b1 // indirect
16+
k8s.io/api v0.17.0
3417
k8s.io/client-go v11.0.0+incompatible
35-
k8s.io/klog v0.2.0
36-
k8s.io/kube-openapi v0.0.0-20190320154901-5e45bb682580 // indirect
37-
k8s.io/utils v0.0.0-20190308190857-21c4ce38f2a7 // indirect
38-
sigs.k8s.io/yaml v1.1.0 // indirect
18+
k8s.io/component-base v0.17.0
19+
k8s.io/klog v1.0.0
3920
)

0 commit comments

Comments
 (0)