Skip to content

Commit 1b9cff9

Browse files
author
Muhammad Shahzeb
authored
Merge pull request prometheus-community#22 from grafana/shahzeb/add-group-by-vars-alerts
Add variables for grouping - Postgres mixin
2 parents 077a5d1 + 3302535 commit 1b9cff9

File tree

4 files changed

+41
-31
lines changed

4 files changed

+41
-31
lines changed

postgres_mixin/alerts/postgres.libsonnet

+35-28
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,16 @@
77
{
88
alert: 'PostgreSQLMaxConnectionsReached',
99
annotations: {
10-
description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy.',
10+
description: '{{ $labels.instance }} is exceeding the currently configured maximum Postgres connection limit (current value: {{ $value }}s). Services may be degraded - please take immediate action (you probably need to increase max_connections in the Docker image and re-deploy).',
1111
summary: 'Postgres connections count is over the maximum amount.',
1212
},
1313
expr: |||
14-
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
14+
sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s})
1515
>=
16-
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
16+
sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s})
1717
-
18-
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19-
||| % $._config,
18+
sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
2020
'for': '1m',
2121
labels: {
2222
severity: 'warning',
@@ -29,14 +29,14 @@
2929
summary: 'Postgres connections count is over 80% of maximum amount.',
3030
},
3131
expr: |||
32-
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
32+
sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s})
3333
>
3434
(
35-
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
35+
sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s})
3636
-
37-
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
37+
sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
3838
) * 0.8
39-
||| % $._config,
39+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
4040
'for': '10m',
4141
labels: {
4242
severity: 'warning',
@@ -61,12 +61,12 @@
6161
summary: 'PostgreSQL high number of slow queries.',
6262
},
6363
expr: |||
64-
avg by (datname) (
64+
avg by (datname, %(agg)s) (
6565
rate (
66-
pg_stat_activity_max_tx_duration{%(dbNameFilter)s,%(postgresExporterSelector)s}[2m]
66+
pg_stat_activity_max_tx_duration{%(dbNameFilter)s, %(postgresExporterSelector)s}[2m]
6767
)
6868
) > 2 * 60
69-
||| % $._config,
69+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
7070
'for': '2m',
7171
labels: {
7272
severity: 'warning',
@@ -79,16 +79,16 @@
7979
summary: 'PostgreSQL high number of queries per second.',
8080
},
8181
expr: |||
82-
avg by (datname) (
82+
avg by (datname, %(agg)s) (
8383
irate(
84-
pg_stat_database_xact_commit{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
84+
pg_stat_database_xact_commit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]
8585
)
8686
+
8787
irate(
88-
pg_stat_database_xact_rollback{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
88+
pg_stat_database_xact_rollback{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]
8989
)
9090
) > 10000
91-
||| % $._config,
91+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
9292
'for': '5m',
9393
labels: {
9494
severity: 'warning',
@@ -101,20 +101,20 @@
101101
summary: 'PostgreSQL low cache hit rate.',
102102
},
103103
expr: |||
104-
avg by (datname) (
105-
rate(pg_stat_database_blks_hit{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m])
104+
avg by (datname, %(agg)s) (
105+
rate(pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m])
106106
/
107107
(
108108
rate(
109-
pg_stat_database_blks_hit{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
109+
pg_stat_database_blks_hit{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]
110110
)
111111
+
112112
rate(
113-
pg_stat_database_blks_read{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
113+
pg_stat_database_blks_read{%(dbNameFilter)s, %(postgresExporterSelector)s}[5m]
114114
)
115115
)
116116
) < 0.98
117-
||| % $._config,
117+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
118118
'for': '5m',
119119
labels: {
120120
severity: 'warning',
@@ -157,9 +157,14 @@
157157
summary: 'PostgreSQL has high number of acquired locks.',
158158
},
159159
expr: |||
160-
max by( server, job, datname, namespace) ((pg_locks_count{%(dbNameFilter)s}) /
161-
on(instance, namespace) group_left(server) (pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{})) > 0.20
162-
||| % $._config,
160+
max by(datname, %(agg)s) (
161+
(pg_locks_count{%(dbNameFilter)s})
162+
/
163+
on(%(aggWithoutServer)s) group_left(server) (
164+
pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{}
165+
)
166+
) > 0.20
167+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels), aggWithoutServer: std.join(',', std.filter(function(x) x != "server", $._config.groupLabels + $._config.instanceLabels)) },
163168
'for': '5m',
164169
labels: {
165170
severity: 'warning',
@@ -171,7 +176,9 @@
171176
description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.',
172177
summary: 'PostgreSQL replication lagging more than 1 hour.',
173178
},
174-
expr: '(pg_replication_lag{} > 3600) and on (instance) (pg_replication_is_replica{} == 1)',
179+
expr: |||
180+
(pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1)
181+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
175182
'for': '5m',
176183
labels: {
177184
severity: 'warning',
@@ -223,12 +230,12 @@
223230
timestamp(
224231
pg_stat_user_tables_n_dead_tup{} >
225232
pg_stat_user_tables_n_live_tup{}
226-
* on(namespace, job, service, instance, server) group_left pg_settings_autovacuum_vacuum_scale_factor{}
227-
+ on(namespace, job, service, instance, server) group_left pg_settings_autovacuum_vacuum_threshold{}
233+
* on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{}
234+
+ on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{}
228235
)
229236
< time() - 36000
230237
)
231-
|||,
238+
||| % $._config { agg: std.join(', ', $._config.groupLabels + $._config.instanceLabels) },
232239
'for': '30m',
233240
labels: {
234241
severity: 'critical',

postgres_mixin/config.libsonnet

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
{
22
_config+:: {
33
dbNameFilter: 'datname!~"template.*"',
4-
postgresExporterSelector: '',
4+
postgresExporterSelector: 'job="integrations/postgres_exporter"',
5+
groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'],
6+
instanceLabels: ['instance', 'server'],
7+
enableMultiCluster: false,
58
},
69
}
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
grafanaDashboards+:: {
3-
'postgres-overview.json': (import 'postgres-overview.json'),
3+
'postgresql-overview.json': (import 'postgresql-overview.json'),
44
},
55
}

postgres_mixin/dashboards/postgres-overview.json renamed to postgres_mixin/dashboards/postgresql-overview.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@
4545
"repeatRowId": null,
4646
"showTitle": true,
4747
"span": 4,
48-
"title": "Postgres Overview",
48+
"title": "PostgreSQL overview",
4949
"titleSize": "h6",
5050
"type": "row"
5151
},

0 commit comments

Comments
 (0)