Skip to content

Commit 20d6bac

Browse files
author
Corin Lawson
committed
Introduce histogram support
Prior to this change, the custom queries were restricted to counters and gauges. This change introduces a new ColumnUsage, namely HISTOGRAM, that expects the column to contain an array of upper inclusive bounds for each observation bucket in the emitted metric. It also expects three more columns to be present with the suffixes: - `_bucket`, containing an array of cumulative counters for the observation buckets; - `_sum`, the total sum of all observed values; and - `_count`, the count of events that have been observed. A flag has been added to the MetricMap struct to easily identify metrics that should emit a histogram and the construction of a histogram metric is aided by the pg.Array function and a new helper dbToUint64 function. Finally, and example of usage is given in queries.yaml. fixes prometheus-community#402 Signed-off-by: Corin Lawson <[email protected]>
1 parent b12c8ab commit 20d6bac

File tree

2 files changed

+171
-6
lines changed

2 files changed

+171
-6
lines changed

cmd/postgres_exporter/postgres_exporter.go

+127-6
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ const (
8080
GAUGE ColumnUsage = iota // Use this column as a gauge
8181
MAPPEDMETRIC ColumnUsage = iota // Use this column with the supplied mapping of text values
8282
DURATION ColumnUsage = iota // This column should be interpreted as a text duration (and converted to milliseconds)
83+
HISTOGRAM ColumnUsage = iota // Use this column as a histogram
8384
)
8485

8586
// UnmarshalYAML implements the yaml.Unmarshaller interface.
@@ -169,6 +170,7 @@ type MetricMapNamespace struct {
169170
// be mapped to by the collector
170171
type MetricMap struct {
171172
discard bool // Should metric be discarded during mapping?
173+
histogram bool // Should metric be treated as a histogram?
172174
vtype prometheus.ValueType // Prometheus valuetype
173175
desc *prometheus.Desc // Prometheus descriptor
174176
conversion func(interface{}) (float64, bool) // Conversion function to turn PG result into float64
@@ -630,6 +632,27 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri
630632
return dbToFloat64(in)
631633
},
632634
}
635+
case HISTOGRAM:
636+
thisMap[columnName] = MetricMap{
637+
histogram: true,
638+
vtype: prometheus.UntypedValue,
639+
desc: prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, columnName), columnMapping.description, variableLabels, serverLabels),
640+
conversion: func(in interface{}) (float64, bool) {
641+
return dbToFloat64(in)
642+
},
643+
}
644+
thisMap[columnName+"_bucket"] = MetricMap{
645+
histogram: true,
646+
discard: true,
647+
}
648+
thisMap[columnName+"_sum"] = MetricMap{
649+
histogram: true,
650+
discard: true,
651+
}
652+
thisMap[columnName+"_count"] = MetricMap{
653+
histogram: true,
654+
discard: true,
655+
}
633656
case MAPPEDMETRIC:
634657
thisMap[columnName] = MetricMap{
635658
vtype: prometheus.GaugeValue,
@@ -701,6 +724,9 @@ func stringToColumnUsage(s string) (ColumnUsage, error) {
701724
case "GAUGE":
702725
u = GAUGE
703726

727+
case "HISTOGRAM":
728+
u = HISTOGRAM
729+
704730
case "MAPPEDMETRIC":
705731
u = MAPPEDMETRIC
706732

@@ -752,6 +778,46 @@ func dbToFloat64(t interface{}) (float64, bool) {
752778
}
753779
}
754780

781+
// Convert database.sql types to uint64 for Prometheus consumption. Null types are mapped to 0. string and []byte
782+
// types are mapped as 0 and !ok
783+
func dbToUint64(t interface{}) (uint64, bool) {
784+
switch v := t.(type) {
785+
case uint64:
786+
return v, true
787+
case int64:
788+
return uint64(v), true
789+
case float64:
790+
return uint64(v), true
791+
case time.Time:
792+
return uint64(v.Unix()), true
793+
case []byte:
794+
// Try and convert to string and then parse to a uint64
795+
strV := string(v)
796+
result, err := strconv.ParseUint(strV, 10, 64)
797+
if err != nil {
798+
log.Infoln("Could not parse []byte:", err)
799+
return 0, false
800+
}
801+
return result, true
802+
case string:
803+
result, err := strconv.ParseUint(v, 10, 64)
804+
if err != nil {
805+
log.Infoln("Could not parse string:", err)
806+
return 0, false
807+
}
808+
return result, true
809+
case bool:
810+
if v {
811+
return 1, true
812+
}
813+
return 0, true
814+
case nil:
815+
return 0, true
816+
default:
817+
return 0, false
818+
}
819+
}
820+
755821
// Convert database.sql to string for Prometheus labels. Null types are mapped to empty strings.
756822
func dbToString(t interface{}) (string, bool) {
757823
switch v := t.(type) {
@@ -1284,13 +1350,68 @@ func queryNamespaceMapping(server *Server, namespace string, mapping MetricMapNa
12841350
continue
12851351
}
12861352

1287-
value, ok := dbToFloat64(columnData[idx])
1288-
if !ok {
1289-
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx])))
1290-
continue
1353+
if metricMapping.histogram {
1354+
var keys []float64
1355+
err = pq.Array(&keys).Scan(columnData[idx])
1356+
if err != nil {
1357+
return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "buckets:", namespace, err))
1358+
}
1359+
1360+
var values []int64
1361+
valuesIdx, ok := columnIdx[columnName+"_bucket"]
1362+
if !ok {
1363+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_bucket")))
1364+
continue
1365+
}
1366+
err = pq.Array(&values).Scan(columnData[valuesIdx])
1367+
if err != nil {
1368+
return []prometheus.Metric{}, []error{}, errors.New(fmt.Sprintln("Error retrieving", columnName, "bucket values:", namespace, err))
1369+
}
1370+
1371+
buckets := make(map[float64]uint64, len(keys))
1372+
for i, key := range keys {
1373+
if i >= len(values) {
1374+
break
1375+
}
1376+
buckets[key] = uint64(values[i])
1377+
}
1378+
1379+
idx, ok = columnIdx[columnName+"_sum"]
1380+
if !ok {
1381+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_sum")))
1382+
continue
1383+
}
1384+
sum, ok := dbToFloat64(columnData[idx])
1385+
if !ok {
1386+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_sum", columnData[idx])))
1387+
continue
1388+
}
1389+
1390+
idx, ok = columnIdx[columnName+"_count"]
1391+
if !ok {
1392+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Missing column: ", namespace, columnName+"_count")))
1393+
continue
1394+
}
1395+
count, ok := dbToUint64(columnData[idx])
1396+
if !ok {
1397+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName+"_count", columnData[idx])))
1398+
continue
1399+
}
1400+
1401+
metric = prometheus.MustNewConstHistogram(
1402+
metricMapping.desc,
1403+
count, sum, buckets,
1404+
labels...,
1405+
)
1406+
} else {
1407+
value, ok := dbToFloat64(columnData[idx])
1408+
if !ok {
1409+
nonfatalErrors = append(nonfatalErrors, errors.New(fmt.Sprintln("Unexpected error parsing column: ", namespace, columnName, columnData[idx])))
1410+
continue
1411+
}
1412+
// Generate the metric
1413+
metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...)
12911414
}
1292-
// Generate the metric
1293-
metric = prometheus.MustNewConstMetric(metricMapping.desc, metricMapping.vtype, value, labels...)
12941415
} else {
12951416
// Unknown metric. Report as untyped if scan to float64 works, else note an error too.
12961417
metricLabel := fmt.Sprintf("%s_%s", namespace, columnName)

queries.yaml

+44
Original file line numberDiff line numberDiff line change
@@ -203,3 +203,47 @@ pg_stat_statements:
203203
- blk_write_time_seconds:
204204
usage: "COUNTER"
205205
description: "Total time the statement spent writing blocks, in milliseconds (if track_io_timing is enabled, otherwise zero)"
206+
207+
pg_stat_activity:
208+
query: |
209+
WITH
210+
metrics AS (
211+
SELECT
212+
application_name,
213+
SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
214+
COUNT(*) AS process_idle_seconds_count
215+
FROM pg_stat_activity
216+
WHERE state = 'idle'
217+
GROUP BY application_name
218+
),
219+
buckets AS (
220+
SELECT
221+
application_name,
222+
le,
223+
SUM(
224+
CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
225+
THEN 1
226+
ELSE 0
227+
END
228+
)::bigint AS bucket
229+
FROM
230+
pg_stat_activity,
231+
UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
232+
GROUP BY application_name, le
233+
ORDER BY application_name, le
234+
)
235+
SELECT
236+
application_name,
237+
process_idle_seconds_sum,
238+
process_idle_seconds_count,
239+
ARRAY_AGG(le) AS process_idle_seconds,
240+
ARRAY_AGG(bucket) AS process_idle_seconds_bucket
241+
FROM metrics JOIN buckets USING (application_name)
242+
GROUP BY 1, 2, 3
243+
metrics:
244+
- application_name:
245+
usage: "LABEL"
246+
description: "Application Name"
247+
- process_idle_seconds:
248+
usage: "HISTOGRAM"
249+
description: "Idle time of server processes"

0 commit comments

Comments
 (0)