Skip to content

Commit 1fed174

Browse files
committed
Add arbitrary log support
1 parent 4885f07 commit 1fed174

File tree

10 files changed

+264
-103
lines changed

10 files changed

+264
-103
lines changed

config/docker-monitor-filelog.json

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
{
2+
"plugin": "syslog",
3+
"pluginConfig": {
4+
"timestamp": "^time=\"(\\S*)\"",
5+
"message": "msg=\"([^\n]*)\"",
6+
"timestampFormat": "2006-01-02T15:04:05.999999999-07:00"
7+
},
8+
"logPath": "/var/log/docker.log",
9+
"lookback": "5m",
10+
"bufferSize": 10,
11+
"source": "docker-monitor",
12+
"conditions": [],
13+
"rules": []
14+
}

config/docker-monitor.json

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"plugin": "journald",
3+
"pluginConfig": {
4+
"source": "docker"
5+
},
6+
"logPath": "/var/log/journal",
7+
"lookback": "5m",
8+
"bufferSize": 10,
9+
"source": "docker-monitor",
10+
"conditions": [],
11+
"rules": []
12+
}

config/kernel-monitor-filelog.json

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
{
2+
"plugin": "syslog",
3+
"pluginConfig": {
4+
"timestamp": "^.{15}",
5+
"message": "kernel: \\[.*\\] (.*)",
6+
"timestampFormat": "Jan _2 15:04:05"
7+
},
8+
"logPath": "/var/log/kern.log",
9+
"lookback": "5m",
10+
"bufferSize": 10,
11+
"source": "kernel-monitor",
12+
"conditions": [
13+
{
14+
"type": "KernelDeadlock",
15+
"reason": "KernelHasNoDeadlock",
16+
"message": "kernel has no deadlock"
17+
}
18+
],
19+
"rules": [
20+
{
21+
"type": "temporary",
22+
"reason": "OOMKilling",
23+
"pattern": "Kill process \\d+ (.+) score \\d+ or sacrifice child\\nKilled process \\d+ (.+) total-vm:\\d+kB, anon-rss:\\d+kB, file-rss:\\d+kB"
24+
},
25+
{
26+
"type": "temporary",
27+
"reason": "TaskHung",
28+
"pattern": "task \\S+:\\w+ blocked for more than \\w+ seconds\\."
29+
},
30+
{
31+
"type": "temporary",
32+
"reason": "KernelOops",
33+
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
34+
},
35+
{
36+
"type": "temporary",
37+
"reason": "KernelOops",
38+
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
39+
},
40+
{
41+
"type": "permanent",
42+
"condition": "KernelDeadlock",
43+
"reason": "AUFSUmountHung",
44+
"pattern": "task umount\\.aufs:\\w+ blocked for more than \\w+ seconds\\."
45+
},
46+
{
47+
"type": "permanent",
48+
"condition": "KernelDeadlock",
49+
"reason": "DockerHung",
50+
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
51+
}
52+
]
53+
}

config/kernel-monitor.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
{
22
"plugin": "journald",
3+
"pluginConfig": {
4+
"source": "kernel"
5+
},
36
"logPath": "/var/log/journal",
47
"lookback": "5m",
58
"bufferSize": 10,

pkg/kernelmonitor/logwatchers/journald/log_watcher.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,13 @@ func (j *journaldWatcher) watchLoop() {
114114
}
115115
}
116116

117-
// defaultJournalLogPath is the default path of journal log.
118-
const defaultJournalLogPath = "/var/log/journal"
117+
const (
118+
// defaultJournalLogPath is the default path of journal log.
119+
defaultJournalLogPath = "/var/log/journal"
120+
121+
// configSourceKey is the key of source configuration in the plugin configuration.
122+
configSourceKey = "source"
123+
)
119124

120125
// getJournal returns a journal client.
121126
func getJournal(cfg types.WatcherConfig) (*sdjournal.Journal, error) {
@@ -140,14 +145,18 @@ func getJournal(cfg types.WatcherConfig) (*sdjournal.Journal, error) {
140145
if err != nil {
141146
return nil, fmt.Errorf("failed to lookback %q: %v", since, err)
142147
}
143-
// TODO(random-liu): Make this configurable to support parsing other logs.
144-
kernelMatch := sdjournal.Match{
145-
Field: sdjournal.SD_JOURNAL_FIELD_TRANSPORT,
146-
Value: "kernel",
148+
// Empty source is not allowed and treated as an error.
149+
source := cfg.PluginConfig[configSourceKey]
150+
if source == "" {
151+
return nil, fmt.Errorf("failed to filter journal log, empty source is not allowed")
152+
}
153+
match := sdjournal.Match{
154+
Field: sdjournal.SD_JOURNAL_FIELD_SYSLOG_IDENTIFIER,
155+
Value: source,
147156
}
148-
err = journal.AddMatch(kernelMatch.String())
157+
err = journal.AddMatch(match.String())
149158
if err != nil {
150-
return nil, fmt.Errorf("failed to add log filter %#v: %v", kernelMatch, err)
159+
return nil, fmt.Errorf("failed to add log filter %#v: %v", match, err)
151160
}
152161
return journal, nil
153162
}

pkg/kernelmonitor/logwatchers/syslog/helpers.go

Lines changed: 63 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -17,86 +17,88 @@ package syslog
1717

1818
import (
1919
"fmt"
20-
"io"
21-
"os"
22-
"strings"
20+
"regexp"
2321
"time"
2422

2523
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
2624

27-
"github.com/google/cadvisor/utils/tail"
25+
"github.com/golang/glog"
2826
)
2927

28+
// translator translates log line into internal log type based on user defined
29+
// regular expression.
30+
type translator struct {
31+
timestampRegexp *regexp.Regexp
32+
messageRegexp *regexp.Regexp
33+
timestampFormat string
34+
}
35+
36+
const (
37+
// NOTE that we support submatch for both timestamp and message regular expressions. When
38+
// there are multiple matches returned by submatch, only **the last** is used.
39+
// timestampKey is the key of timestamp regular expression in the plugin configuration.
40+
timestampKey = "timestamp"
41+
// messageKey is the key of message regular expression in the plugin configuration.
42+
messageKey = "message"
43+
// timestampFormatKey is the key of timestamp format string in the plugin configuration.
44+
timestampFormatKey = "timestampFormat"
45+
)
46+
47+
func newTranslatorOrDie(pluginConfig map[string]string) *translator {
48+
if err := validatePluginConfig(pluginConfig); err != nil {
49+
glog.Errorf("Failed to validate plugin configuration %+v: %v", pluginConfig, err)
50+
}
51+
return &translator{
52+
timestampRegexp: regexp.MustCompile(pluginConfig[timestampKey]),
53+
messageRegexp: regexp.MustCompile(pluginConfig[messageKey]),
54+
timestampFormat: pluginConfig[timestampFormatKey],
55+
}
56+
}
57+
3058
// translate translates the log line into internal type.
31-
func translate(line string) (*kerntypes.KernelLog, error) {
32-
timestamp, message, err := parseLine(line)
59+
func (t *translator) translate(line string) (*kerntypes.KernelLog, error) {
60+
// Parse timestamp.
61+
matches := t.timestampRegexp.FindStringSubmatch(line)
62+
if len(matches) == 0 {
63+
return nil, fmt.Errorf("no timestamp found in line %q with regular expression %v", line, t.timestampRegexp)
64+
}
65+
timestamp, err := time.ParseInLocation(t.timestampFormat, matches[len(matches)-1], time.Local)
3366
if err != nil {
34-
return nil, err
67+
return nil, fmt.Errorf("failed to parse timestamp %q: %v", matches[len(matches)-1], err)
68+
}
69+
// Formalize the timestmap.
70+
timestamp = formalizeTimestamp(timestamp)
71+
// Parse message.
72+
matches = t.messageRegexp.FindStringSubmatch(line)
73+
if len(matches) == 0 {
74+
return nil, fmt.Errorf("no message found in line %q with regular expression %v", line, t.messageRegexp)
3575
}
76+
message := matches[len(matches)-1]
3677
return &kerntypes.KernelLog{
3778
Timestamp: timestamp,
3879
Message: message,
3980
}, nil
4081
}
4182

42-
const (
43-
// timestampLen is the length of timestamp in syslog logging format.
44-
timestampLen = 15
45-
// messagePrefix is the character before real message.
46-
messagePrefix = "]"
47-
)
48-
49-
// parseLine parses one log line into timestamp and message.
50-
func parseLine(line string) (time.Time, string, error) {
51-
// Trim the spaces to make sure timestamp could be found
52-
line = strings.TrimSpace(line)
53-
if len(line) < timestampLen {
54-
return time.Time{}, "", fmt.Errorf("the line is too short: %q", line)
83+
// validatePluginConfig validates whether the plugin configuration.
84+
func validatePluginConfig(cfg map[string]string) error {
85+
if cfg[timestampKey] == "" {
86+
return fmt.Errorf("unexpected empty timestamp regular expression")
5587
}
56-
// Example line: Jan 1 00:00:00 hostname kernel: [0.000000] component: log message
57-
now := time.Now()
58-
// There is no time zone information in kernel log timestamp, apply the current time
59-
// zone.
60-
timestamp, err := time.ParseInLocation(time.Stamp, line[:timestampLen], time.Local)
61-
if err != nil {
62-
return time.Time{}, "", fmt.Errorf("error parsing timestamp in line %q: %v", line, err)
88+
if cfg[messageKey] == "" {
89+
return fmt.Errorf("unexpected empty message regular expression")
6390
}
64-
// There is no year information in kernel log timestamp, apply the current year.
65-
// This could go wrong during looking back phase after kernel monitor is started,
66-
// and the old logs are generated in old year.
67-
timestamp = timestamp.AddDate(now.Year(), 0, 0)
68-
69-
loc := strings.Index(line, messagePrefix)
70-
if loc == -1 {
71-
return timestamp, "", fmt.Errorf("can't find message prefix %q in line %q", messagePrefix, line)
91+
if cfg[timestampFormatKey] == "" {
92+
return fmt.Errorf("unexpected empty timestamp format string")
7293
}
73-
message := strings.Trim(line[loc+1:], " ")
74-
75-
return timestamp, message, nil
94+
return nil
7695
}
7796

78-
// defaultKernelLogPath the default path of syslog kernel log.
79-
const defaultKernelLogPath = "/var/log/kern.log"
80-
81-
// getLogReader returns log reader for syslog log. Note that getLogReader doesn't look back
82-
// to the rolled out logs.
83-
func getLogReader(path string) (io.ReadCloser, error) {
84-
if path == "" {
85-
path = defaultKernelLogPath
86-
}
87-
// To handle log rotation, tail will not report error immediately if
88-
// the file doesn't exist. So we check file existence frist.
89-
// This could go wrong during mid-rotation. It should recover after
90-
// several restart when the log file is created again. The chance
91-
// is slim but we should still fix this in the future.
92-
// TODO(random-liu): Handle log missing during rotation.
93-
_, err := os.Stat(path)
94-
if err != nil {
95-
return nil, fmt.Errorf("failed to stat the file %q: %v", path, err)
96-
}
97-
tail, err := tail.NewTail(path)
98-
if err != nil {
99-
return nil, fmt.Errorf("failed to tail the file %q: %v", path, err)
97+
// formalizeTimestamp formalizes the timestamp. We need this because some log doesn't contain full
98+
// timestamp, e.g. syslog.
99+
func formalizeTimestamp(t time.Time) time.Time {
100+
if t.Year() == 0 {
101+
t = t.AddDate(time.Now().Year(), 0, 0)
100102
}
101-
return tail, nil
103+
return t
102104
}

pkg/kernelmonitor/logwatchers/syslog/helpers_test.go

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,46 +21,71 @@ import (
2121
"time"
2222

2323
"github.com/stretchr/testify/assert"
24+
"github.com/stretchr/testify/require"
2425

2526
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
2627
)
2728

2829
func TestTranslate(t *testing.T) {
2930
year := time.Now().Year()
3031
testCases := []struct {
31-
input string
32-
err bool
33-
log *kerntypes.KernelLog
32+
config map[string]string
33+
input string
34+
err bool
35+
log *kerntypes.KernelLog
3436
}{
3537
{
36-
input: "May 1 12:23:45 hostname kernel: [0.000000] component: log message",
38+
// missing year and timezone
39+
// "timestamp": "^.{15}",
40+
// "message": "kernel \\[.*\\] (.*)",
41+
// "timestampFormat": "Jan _2 15:04:05",
42+
config: getTestPluginConfig(),
43+
input: "May 1 12:23:45 hostname kernel: [0.000000] component: log message",
3744
log: &kerntypes.KernelLog{
3845
Timestamp: time.Date(year, time.May, 1, 12, 23, 45, 0, time.Local),
3946
Message: "component: log message",
4047
},
4148
},
4249
{
4350
// no log message
44-
input: "May 21 12:23:45 hostname kernel: [9.999999]",
51+
config: getTestPluginConfig(),
52+
input: "May 21 12:23:45 hostname kernel: [9.999999] ",
4553
log: &kerntypes.KernelLog{
4654
Timestamp: time.Date(year, time.May, 21, 12, 23, 45, 0, time.Local),
4755
Message: "",
4856
},
4957
},
5058
{
5159
// the right square bracket is missing
52-
input: "May 21 12:23:45 hostname kernel: [9.999999 component: log message",
53-
err: true,
60+
config: getTestPluginConfig(),
61+
input: "May 21 12:23:45 hostname kernel: [9.999999 component: log message",
62+
err: true,
63+
},
64+
{
65+
// contains full timestamp
66+
config: map[string]string{
67+
"timestamp": "^time=\"(\\S*)\"",
68+
"message": "msg=\"([^\n]*)\"",
69+
"timestampFormat": "2006-01-02T15:04:05.999999999-07:00",
70+
},
71+
input: `time="2017-02-01T17:58:34.999999999-08:00" level=error msg="test log line1\n test log line2"`,
72+
log: &kerntypes.KernelLog{
73+
Timestamp: time.Date(2017, 2, 1, 17, 58, 34, 999999999, time.FixedZone("PST", -8*3600)),
74+
Message: `test log line1\n test log line2`,
75+
},
5476
},
5577
}
5678

5779
for c, test := range testCases {
5880
t.Logf("TestCase #%d: %#v", c+1, test)
59-
log, err := translate(test.input)
60-
if (err != nil) != test.err {
61-
t.Errorf("case %d: error assertion failed, got log: %+v, error: %v", c+1, log, err)
62-
continue
81+
trans := newTranslatorOrDie(test.config)
82+
log, err := trans.translate(test.input)
83+
if !test.err {
84+
require.NoError(t, err)
85+
assert.Equal(t, test.log.Timestamp.String(), log.Timestamp.String())
86+
assert.Equal(t, test.log.Message, log.Message)
87+
} else {
88+
require.Error(t, err)
6389
}
64-
assert.Equal(t, test.log, log)
6590
}
6691
}

0 commit comments

Comments
 (0)