Skip to content

Commit 92e67b8

Browse files
authored
Merge pull request #92 from Random-Liu/generalize-log-monitor-code-name
Generalize the kernel monitor code.
2 parents 6e35bcf + dba47bd commit 92e67b8

27 files changed

+276
-254
lines changed

Dockerfile.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ RUN test -h /etc/localtime && rm -f /etc/localtime && cp /usr/share/zoneinfo/UTC
2020

2121
ADD ./bin/node-problem-detector /node-problem-detector
2222
ADD config /config
23-
ENTRYPOINT ["/node-problem-detector", "--kernel-monitor=/config/kernel-monitor.json"]
23+
ENTRYPOINT ["/node-problem-detector", "--system-log-monitor=/config/kernel-monitor.json"]

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,12 +49,12 @@ List of supported problem daemons:
4949

5050
| Problem Daemon | NodeCondition | Description |
5151
|----------------|:---------------:|:------------|
52-
| [KernelMonitor](https://github.com/kubernetes/node-problem-detector/tree/master/pkg/kernelmonitor) | KernelDeadlock | A problem daemon monitors kernel log and reports problem according to predefined rules. |
52+
| [KernelMonitor](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) | KernelDeadlock | A system log monitor monitors kernel log and reports problem according to predefined rules. |
5353

5454
# Usage
5555
## Flags
5656
* `--version`: Print current version of node-problem-detector.
57-
* `--kernel-monitor`: The configuration used by the kernel monitor, e.g.
57+
* `--system-log-monitor`: The configuration used by the system log monitor, e.g.
5858
[config/kernel-monitor.json](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json).
5959
* `--apiserver-override`: A URI parameter used to customize how node-problem-detector
6060
connects the apiserver. The format is same as the
@@ -112,7 +112,7 @@ spec:
112112
hostPath:
113113
path: /etc/localtime
114114
```
115-
* Edit node-problem-detector.yaml to fit your environment: Set `log` volume to your system log diretory. (Used by KernelMonitor)
115+
* Edit node-problem-detector.yaml to fit your environment: Set `log` volume to your system log diretory. (Used by SystemLogMonitor)
116116
* Create the DaemonSet with `kubectl create -f node-problem-detector.yaml`
117117
* If needed, you can use [ConfigMap](http://kubernetes.io/docs/user-guide/configmap/)
118118
to overwrite the `config/`.

cmd/node_problem_detector.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@ import (
2626
"github.com/golang/glog"
2727
"github.com/spf13/pflag"
2828

29-
"k8s.io/node-problem-detector/pkg/kernelmonitor"
3029
"k8s.io/node-problem-detector/pkg/options"
3130
"k8s.io/node-problem-detector/pkg/problemclient"
3231
"k8s.io/node-problem-detector/pkg/problemdetector"
32+
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
3333
"k8s.io/node-problem-detector/pkg/version"
3434
)
3535

@@ -67,9 +67,9 @@ func main() {
6767
os.Exit(0)
6868
}
6969

70-
k := kernelmonitor.NewKernelMonitorOrDie(npdo.KernelMonitorConfigPath)
70+
l := systemlogmonitor.NewLogMonitorOrDie(npdo.SystemLogMonitorConfigPath)
7171
c := problemclient.NewClientOrDie(npdo)
72-
p := problemdetector.NewProblemDetector(k, c)
72+
p := problemdetector.NewProblemDetector(l, c)
7373

7474
// Start http server.
7575
if npdo.ServerPort > 0 {

config/docker-monitor-filelog.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"plugin": "syslog",
2+
"plugin": "filelog",
33
"pluginConfig": {
44
"timestamp": "^time=\"(\\S*)\"",
55
"message": "msg=\"([^\n]*)\"",

config/kernel-monitor-filelog.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"plugin": "syslog",
2+
"plugin": "filelog",
33
"pluginConfig": {
44
"timestamp": "^.{15}",
55
"message": "kernel: \\[.*\\] (.*)",

pkg/kernelmonitor/README.md

Lines changed: 0 additions & 61 deletions
This file was deleted.

pkg/options/options.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ import (
3030
type NodeProblemDetectorOptions struct {
3131
// command line options
3232

33-
// KernelMonitorConfigPath specifies the path to kernel monitor configuration file.
34-
KernelMonitorConfigPath string
33+
// SystemLogMonitorConfigPath specifies the path to system log monitor configuration file.
34+
SystemLogMonitorConfigPath string
3535
// ApiServerOverride is the custom URI used to connect to Kubernetes ApiServer.
3636
ApiServerOverride string
3737
// PrintVersion is the flag determining whether version information is printed.
@@ -55,8 +55,8 @@ func NewNodeProblemDetectorOptions() *NodeProblemDetectorOptions {
5555

5656
// AddFlags adds node problem detector command line options to pflag.
5757
func (npdo *NodeProblemDetectorOptions) AddFlags(fs *pflag.FlagSet) {
58-
fs.StringVar(&npdo.KernelMonitorConfigPath, "kernel-monitor",
59-
"/config/kernel-monitor.json", "The path to the kernel monitor config file")
58+
fs.StringVar(&npdo.SystemLogMonitorConfigPath, "system-log-monitor",
59+
"/config/kernel-monitor.json", "The path to the system log monitor config file")
6060
fs.StringVar(&npdo.ApiServerOverride, "apiserver-override",
6161
"", "Custom URI used to connect to Kubernetes ApiServer")
6262
fs.BoolVar(&npdo.PrintVersion, "version", false, "Print version information and quit")

pkg/problemdetector/problem_detector.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ import (
2424
"k8s.io/kubernetes/pkg/util/clock"
2525

2626
"k8s.io/node-problem-detector/pkg/condition"
27-
"k8s.io/node-problem-detector/pkg/kernelmonitor"
2827
"k8s.io/node-problem-detector/pkg/problemclient"
28+
"k8s.io/node-problem-detector/pkg/systemlogmonitor"
2929
"k8s.io/node-problem-detector/pkg/util"
3030
)
3131

@@ -39,12 +39,12 @@ type problemDetector struct {
3939
client problemclient.Client
4040
conditionManager condition.ConditionManager
4141
// TODO(random-liu): Use slices of problem daemons if multiple monitors are needed in the future
42-
monitor kernelmonitor.KernelMonitor
42+
monitor systemlogmonitor.LogMonitor
4343
}
4444

4545
// NewProblemDetector creates the problem detector. Currently we just directly passed in the problem daemons, but
4646
// in the future we may want to let the problem daemons register themselves.
47-
func NewProblemDetector(monitor kernelmonitor.KernelMonitor, client problemclient.Client) ProblemDetector {
47+
func NewProblemDetector(monitor systemlogmonitor.LogMonitor, client problemclient.Client) ProblemDetector {
4848
return &problemDetector{
4949
client: client,
5050
conditionManager: condition.NewConditionManager(client, clock.RealClock{}),

pkg/systemlogmonitor/README.md

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# System Log Monitor
2+
3+
*System Log Monitor* is a problem daemon in node problem detector. It monitors
4+
specified system daemon log and detects problems following predefined rules.
5+
6+
The System Log Monitor matches problems according to a set of predefined rule list in
7+
the configuration files. (
8+
[`config/kernel-monitor.json`](https://github.com/kubernetes/node-problem-detector/blob/master/config/kernel-monitor.json) as an example).
9+
The rule list is extensible.
10+
11+
## Limitations
12+
13+
* System Log Monitor only supports file based log and journald now, but it is easy
14+
to extend it with [new log watcher](#new-log-watcher)
15+
16+
## Add New NodeConditions
17+
18+
To support new node conditions, you can extend the `conditions` field in
19+
the configuration file with new condition definition:
20+
21+
```json
22+
{
23+
"type": "NodeConditionType",
24+
"reason": "CamelCaseDefaultNodeConditionReason",
25+
"message": "arbitrary default node condition message"
26+
}
27+
```
28+
29+
## Detect New Problems
30+
31+
To detect new problems, you can extend the `rules` field in the configuration file
32+
with new rule definition:
33+
34+
```json
35+
{
36+
"type": "temporary/permanent",
37+
"condition": "NodeConditionOfPermanentIssue",
38+
"reason": "CamelCaseShortReason",
39+
"message": "regexp matching the issue in the log"
40+
}
41+
```
42+
43+
## Log Watchers
44+
45+
System log monitor supports different log management tools with different log
46+
watchers:
47+
* [filelog](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/filelog): Log watcher for
48+
arbitrary file based log.
49+
* [journald](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/journald): Log watcher for
50+
journald.
51+
Set `plugin` in the configuration file to specify log watcher.
52+
53+
### Plugin Configuration
54+
55+
Log watcher specific configurations are configured in `pluginConfig`.
56+
* **journald**
57+
* source: The [`SYSLOG_IDENTIFIER`](https://www.freedesktop.org/software/systemd/man/systemd.journal-fields.html)
58+
of the log to watch.
59+
* **filelog**:
60+
* timestamp: The regular expression used to match timestamp in the log line.
61+
Submatch is supported, but only the last result will be used as the actual
62+
timestamp.
63+
* message: The regular expression used to match message in the log line.
64+
Submatch is supported, but only the last result will be used as the actual
65+
message.
66+
* timestampFormat: The format of the timestamp. The format string is the time
67+
`2006-01-02T15:04:05Z07:00` in the expected format. (See
68+
[golang timestamp format](https://golang.org/pkg/time/#pkg-constants))
69+
70+
### Change Log Path
71+
72+
Log on different OS distros may locate in different path. The `logPath`
73+
field in the configurtion file is the log path. You can always configure
74+
`logPath` to match your OS distro.
75+
* filelog: `logPath` is the path of log file, e.g. `/var/log/kern.log` for kernel
76+
log.
77+
* journald: `logPath` is the journal log directory, usually `/var/log/journal`.
78+
79+
### New Log Watcher
80+
81+
System log monitor uses [Log
82+
Watcher](https://github.com/kubernetes/node-problem-detector/blob/master/pkg/systemlogmonitor/logwatchers/types/log_watcher.go) to support different log management tools.
83+
It is easy to implement a new log watcher.

pkg/kernelmonitor/config.go renamed to pkg/systemlogmonitor/config.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,26 +14,26 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package kernelmonitor
17+
package systemlogmonitor
1818

1919
import (
20-
watchertypes "k8s.io/node-problem-detector/pkg/kernelmonitor/logwatchers/types"
21-
kerntypes "k8s.io/node-problem-detector/pkg/kernelmonitor/types"
20+
watchertypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/logwatchers/types"
21+
logtypes "k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
2222
"k8s.io/node-problem-detector/pkg/types"
2323
)
2424

25-
// MonitorConfig is the configuration of kernel monitor.
25+
// MonitorConfig is the configuration of log monitor.
2626
type MonitorConfig struct {
27-
// WatcherConfig is the configuration of kernel log watcher.
27+
// WatcherConfig is the configuration of log watcher.
2828
watchertypes.WatcherConfig
2929
// BufferSize is the size (in lines) of the log buffer.
3030
BufferSize int `json:"bufferSize"`
31-
// Source is the source name of the kernel monitor
31+
// Source is the source name of the log monitor
3232
Source string `json:"source"`
33-
// DefaultConditions are the default states of all the conditions kernel monitor should handle.
33+
// DefaultConditions are the default states of all the conditions log monitor should handle.
3434
DefaultConditions []types.Condition `json:"conditions"`
35-
// Rules are the rules kernel monitor will follow to parse the log file.
36-
Rules []kerntypes.Rule `json:"rules"`
35+
// Rules are the rules log monitor will follow to parse the log file.
36+
Rules []logtypes.Rule `json:"rules"`
3737
}
3838

3939
// applyDefaultConfiguration applies default configurations.

pkg/kernelmonitor/log_buffer.go renamed to pkg/systemlogmonitor/log_buffer.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,28 +14,28 @@ See the License for the specific language governing permissions and
1414
limitations under the License.
1515
*/
1616

17-
package kernelmonitor
17+
package systemlogmonitor
1818

1919
import (
2020
"regexp"
2121
"strings"
2222

23-
"k8s.io/node-problem-detector/pkg/kernelmonitor/types"
23+
"k8s.io/node-problem-detector/pkg/systemlogmonitor/types"
2424
)
2525

2626
// LogBuffer buffers the logs and supports match in the log buffer with regular expression.
2727
type LogBuffer interface {
2828
// Push pushes log into the log buffer.
29-
Push(*types.KernelLog)
29+
Push(*types.Log)
3030
// Match with regular expression in the log buffer.
31-
Match(string) []*types.KernelLog
31+
Match(string) []*types.Log
3232
// String returns a concatenated string of the buffered logs.
3333
String() string
3434
}
3535

3636
type logBuffer struct {
3737
// buffer is a simple ring buffer.
38-
buffer []*types.KernelLog
38+
buffer []*types.Log
3939
msg []string
4040
max int
4141
current int
@@ -47,20 +47,20 @@ type logBuffer struct {
4747
// lines of patterns we support.
4848
func NewLogBuffer(maxLines int) *logBuffer {
4949
return &logBuffer{
50-
buffer: make([]*types.KernelLog, maxLines, maxLines),
50+
buffer: make([]*types.Log, maxLines, maxLines),
5151
msg: make([]string, maxLines, maxLines),
5252
max: maxLines,
5353
}
5454
}
5555

56-
func (b *logBuffer) Push(log *types.KernelLog) {
56+
func (b *logBuffer) Push(log *types.Log) {
5757
b.buffer[b.current%b.max] = log
5858
b.msg[b.current%b.max] = log.Message
5959
b.current++
6060
}
6161

6262
// TODO(random-liu): Cache regexp if garbage collection becomes a problem someday.
63-
func (b *logBuffer) Match(expr string) []*types.KernelLog {
63+
func (b *logBuffer) Match(expr string) []*types.Log {
6464
// The expression should be checked outside, and it must match to the end.
6565
reg := regexp.MustCompile(expr + `\z`)
6666
log := b.String()
@@ -72,7 +72,7 @@ func (b *logBuffer) Match(expr string) []*types.KernelLog {
7272
// reverse index
7373
s := len(log) - loc[0] - 1
7474
total := 0
75-
matched := []*types.KernelLog{}
75+
matched := []*types.Log{}
7676
for i := b.tail(); i >= b.current && b.buffer[i%b.max] != nil; i-- {
7777
matched = append(matched, b.buffer[i%b.max])
7878
total += len(b.msg[i%b.max]) + 1 // Add '\n'

0 commit comments

Comments
 (0)