Skip to content

Commit f626032

Browse files
committed
[ws-manager-mk2] Switch to lru cache
1 parent 8caea6c commit f626032

File tree

4 files changed

+37
-49
lines changed

4 files changed

+37
-49
lines changed

components/ws-manager-api/go/crd/v1/workspace_types.go

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ type WorkspaceStatus struct {
128128

129129
// +kubebuilder:validation:Optional
130130
Runtime *WorkspaceRuntimeStatus `json:"runtime,omitempty"`
131-
132-
// +kubebuilder:validation:Optional
133-
Metrics map[string]bool `json:"metrics,omitempty"`
134131
}
135132

136133
// +kubebuilder:validation:Enum=Deployed;Failed;Timeout;UserActivity;HeadlessTaskFailed;StoppedByRequest;EverReady;ContentReady;BackupComplete;BackupFailure

components/ws-manager-mk2/config/crd/bases/workspace.gitpod.io_workspaces.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -395,10 +395,6 @@ spec:
395395
type: object
396396
headless:
397397
type: boolean
398-
metrics:
399-
additionalProperties:
400-
type: boolean
401-
type: object
402398
ownerToken:
403399
type: string
404400
phase:

components/ws-manager-mk2/controllers/metrics.go

Lines changed: 26 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111

1212
workspacev1 "github.com/gitpod-io/gitpod/ws-manager/api/crd/v1"
1313
"github.com/go-logr/logr"
14+
lru "github.com/hashicorp/golang-lru"
1415
"github.com/prometheus/client_golang/prometheus"
1516
"sigs.k8s.io/controller-runtime/pkg/client"
1617
)
@@ -37,9 +38,17 @@ type controllerMetrics struct {
3738

3839
workspacePhases *phaseTotalVec
3940
timeoutSettings *timeoutSettingsVec
41+
42+
// used to prevent recording metrics multiple times
43+
cache *lru.Cache
4044
}
4145

42-
func newControllerMetrics(r *WorkspaceReconciler) *controllerMetrics {
46+
func newControllerMetrics(r *WorkspaceReconciler) (*controllerMetrics, error) {
47+
cache, err := lru.New(6000)
48+
if err != nil {
49+
return nil, err
50+
}
51+
4352
return &controllerMetrics{
4453
startupTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{
4554
Namespace: metricsNamespace,
@@ -88,14 +97,11 @@ func newControllerMetrics(r *WorkspaceReconciler) *controllerMetrics {
8897

8998
workspacePhases: newPhaseTotalVec(r),
9099
timeoutSettings: newTimeoutSettingsVec(r),
91-
}
100+
cache: cache,
101+
}, nil
92102
}
93103

94104
func (m *controllerMetrics) recordWorkspaceStartupTime(log *logr.Logger, ws *workspacev1.Workspace) {
95-
if _, countedAlready := ws.Status.Metrics[workspaceStartupSeconds]; countedAlready {
96-
return
97-
}
98-
99105
class := ws.Spec.Class
100106
tpe := string(ws.Spec.Type)
101107

@@ -105,14 +111,9 @@ func (m *controllerMetrics) recordWorkspaceStartupTime(log *logr.Logger, ws *wor
105111
}
106112

107113
hist.Observe(float64(time.Since(ws.CreationTimestamp.Time).Seconds()))
108-
ws.Status.Metrics[workspaceStartupSeconds] = true
109114
}
110115

111116
func (m *controllerMetrics) countWorkspaceStartFailures(log *logr.Logger, ws *workspacev1.Workspace) {
112-
if _, countedAlready := ws.Status.Metrics[workspaceStartFailuresTotal]; countedAlready {
113-
return
114-
}
115-
116117
class := ws.Spec.Class
117118
tpe := string(ws.Spec.Type)
118119

@@ -122,14 +123,9 @@ func (m *controllerMetrics) countWorkspaceStartFailures(log *logr.Logger, ws *wo
122123
}
123124

124125
counter.Inc()
125-
ws.Status.Metrics[workspaceStartFailuresTotal] = true
126126
}
127127

128128
func (m *controllerMetrics) countWorkspaceStop(log *logr.Logger, ws *workspacev1.Workspace) {
129-
if _, countedAlready := ws.Status.Metrics[workspaceStopsTotal]; countedAlready {
130-
return
131-
}
132-
133129
class := ws.Spec.Class
134130
tpe := string(ws.Spec.Type)
135131

@@ -139,14 +135,9 @@ func (m *controllerMetrics) countWorkspaceStop(log *logr.Logger, ws *workspacev1
139135
}
140136

141137
counter.Inc()
142-
ws.Status.Metrics[workspaceStopsTotal] = true
143138
}
144139

145140
func (m *controllerMetrics) countTotalBackups(log *logr.Logger, ws *workspacev1.Workspace) {
146-
if _, countedAlready := ws.Status.Metrics[workspaceBackupsTotal]; countedAlready {
147-
return
148-
}
149-
150141
class := ws.Spec.Class
151142
tpe := string(ws.Spec.Type)
152143

@@ -156,14 +147,9 @@ func (m *controllerMetrics) countTotalBackups(log *logr.Logger, ws *workspacev1.
156147
}
157148

158149
counter.Inc()
159-
ws.Status.Metrics[workspaceBackupsTotal] = true
160150
}
161151

162152
func (m *controllerMetrics) countTotalBackupFailures(log *logr.Logger, ws *workspacev1.Workspace) {
163-
if _, countedAlready := ws.Status.Metrics[workspaceBackupFailuresTotal]; countedAlready {
164-
return
165-
}
166-
167153
class := ws.Spec.Class
168154
tpe := string(ws.Spec.Type)
169155

@@ -173,14 +159,9 @@ func (m *controllerMetrics) countTotalBackupFailures(log *logr.Logger, ws *works
173159
}
174160

175161
counter.Inc()
176-
ws.Status.Metrics[workspaceBackupFailuresTotal] = true
177162
}
178163

179164
func (m *controllerMetrics) countTotalRestores(log *logr.Logger, ws *workspacev1.Workspace) {
180-
if _, countedAlready := ws.Status.Metrics[workspaceRestoresTotal]; countedAlready {
181-
return
182-
}
183-
184165
class := ws.Spec.Class
185166
tpe := string(ws.Spec.Type)
186167

@@ -190,14 +171,9 @@ func (m *controllerMetrics) countTotalRestores(log *logr.Logger, ws *workspacev1
190171
}
191172

192173
counter.Inc()
193-
ws.Status.Metrics[workspaceRestoresTotal] = true
194174
}
195175

196176
func (m *controllerMetrics) countTotalRestoreFailures(log *logr.Logger, ws *workspacev1.Workspace) {
197-
if _, countedAlready := ws.Status.Metrics[workspaceRestoresFailureTotal]; countedAlready {
198-
return
199-
}
200-
201177
class := ws.Spec.Class
202178
tpe := string(ws.Spec.Type)
203179

@@ -207,7 +183,20 @@ func (m *controllerMetrics) countTotalRestoreFailures(log *logr.Logger, ws *work
207183
}
208184

209185
counter.Inc()
210-
ws.Status.Metrics[workspaceRestoresFailureTotal] = true
186+
}
187+
188+
func (m *controllerMetrics) rememberWorkspace(ws *workspacev1.Workspace) {
189+
m.cache.Add(ws.Name, ws.Status.Phase)
190+
}
191+
192+
func (m *controllerMetrics) shouldUpdate(log *logr.Logger, ws *workspacev1.Workspace) bool {
193+
p, ok := m.cache.Get(ws.Name)
194+
if !ok {
195+
return false
196+
}
197+
198+
phase := p.(workspacev1.WorkspacePhase)
199+
return phase != ws.Status.Phase
211200
}
212201

213202
// Describe implements Collector. It will send exactly one Desc to the provided channel.

components/ws-manager-mk2/controllers/workspace_controller.go

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,10 @@ func NewWorkspaceReconciler(c client.Client, scheme *runtime.Scheme, cfg config.
3535
Config: cfg,
3636
}
3737

38-
metrics := newControllerMetrics(reconciler)
38+
metrics, err := newControllerMetrics(reconciler)
39+
if err != nil {
40+
return nil, err
41+
}
3942
reg.MustRegister(metrics)
4043
reconciler.metrics = metrics
4144

@@ -84,10 +87,6 @@ func (r *WorkspaceReconciler) Reconcile(ctx context.Context, req ctrl.Request) (
8487
workspace.Status.Conditions = []metav1.Condition{}
8588
}
8689

87-
if workspace.Status.Metrics == nil {
88-
workspace.Status.Metrics = make(map[string]bool)
89-
}
90-
9190
log.Info("reconciling workspace", "ws", req.NamespacedName)
9291

9392
var workspacePods corev1.PodList
@@ -156,6 +155,7 @@ func (r *WorkspaceReconciler) actOnStatus(ctx context.Context, workspace *worksp
156155
// need to be deleted and re-created
157156
workspace.Status.PodStarts++
158157
}
158+
r.metrics.rememberWorkspace(workspace)
159159

160160
case workspace.Status.Phase == workspacev1.WorkspacePhaseStopped:
161161
err := r.Client.Delete(ctx, workspace)
@@ -233,6 +233,10 @@ func (r *WorkspaceReconciler) updateMetrics(ctx context.Context, workspace *work
233233

234234
phase := workspace.Status.Phase
235235

236+
if !r.metrics.shouldUpdate(&log, workspace) {
237+
return
238+
}
239+
236240
switch {
237241
case phase == workspacev1.WorkspacePhasePending ||
238242
phase == workspacev1.WorkspacePhaseCreating ||
@@ -265,6 +269,8 @@ func (r *WorkspaceReconciler) updateMetrics(ctx context.Context, workspace *work
265269

266270
r.metrics.countWorkspaceStop(&log, workspace)
267271
}
272+
273+
r.metrics.rememberWorkspace(workspace)
268274
}
269275

270276
func conditionPresentAndTrue(cond []metav1.Condition, tpe string) bool {

0 commit comments

Comments
 (0)