Skip to content

Commit ddf5651

Browse files
geroplroboquat
authored andcommitted
[ops] WebApp: Alerts on exessive RAM and CPU usage
1 parent 8701732 commit ddf5651

File tree

1 file changed

+32
-0
lines changed

1 file changed

+32
-0
lines changed

operations/observability/mixins/meta/rules/components/server/alerts.libsonnet

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,38 @@
116116
description: 'Messagebus pod not running',
117117
},
118118
},
119+
{
120+
alert: 'WebAppServicesHighMemoryUsage',
121+
// Reasoning: high rates of RAM consumption should only be temporary. Values based on past data (around 5-10 is constant)
122+
expr: 'sum(rate(container_memory_working_set_bytes{container!="POD", node=~".*", pod=~"(server|ws-manager-bridge|usage)-.*"}[30m])) by (pod, node) > 10000000',
123+
'for': '15m',
124+
labels: {
125+
// sent to the team internal channel until we fine tuned it
126+
severity: 'warning',
127+
team: 'webapp'
128+
},
129+
annotations: {
130+
runbook_url: 'https://github.com/gitpod-io/runbooks/blob/main/runbooks/WebAppServicesHighMemoryUsage.md',
131+
summary: 'WebApp services consume excessive amounts of memory. Investigation required.',
132+
description: 'WebApp Services execcisve memory usage',
133+
},
134+
},
135+
{
136+
alert: 'WebAppServicesHighCPUUsage',
137+
// Reasoning: high rates of CPU consumption should only be temporary.
138+
expr: 'sum(rate(container_cpu_usage_seconds_total{container!="POD", node=~".*", pod=~"(content-service|dashboard|db|db-sync|messagebus|payment-endpoint|proxy|server|ws-manager-bridge|usage)-.*"}[5m])) by (pod, node) > 0.80',
139+
'for': '10m',
140+
labels: {
141+
// sent to the team internal channel until we fine tuned it
142+
severity: 'warning',
143+
team: 'webapp'
144+
},
145+
annotations: {
146+
runbook_url: 'https://github.com/gitpod-io/runbooks/blob/main/runbooks/WebAppServicesHighCPUUsage.md',
147+
summary: 'WebApp services consume excessive amounts of CPU. Investigation required.',
148+
description: 'WebApp Services execcisve CPU USAGE',
149+
},
150+
},
119151
],
120152
},
121153
],

0 commit comments

Comments
 (0)