Skip to content

Fix kernel monitor issues #81

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 10, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions config/kernel-monitor.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
{
"plugin": "journald",
"logPath": "/var/log/journal",
"lookback": "10m",
"startPattern": "Initializing cgroup subsys cpuset",
"lookback": "5m",
"bufferSize": 10,
"source": "kernel-monitor",
"conditions": [
Expand All @@ -25,12 +24,17 @@
},
{
"type": "temporary",
"reason": "KernelPanic",
"reason": "UnregisterNetDevice",
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
},
{
"type": "temporary",
"reason": "KernelOops",
"pattern": "BUG: unable to handle kernel NULL pointer dereference at .*"
},
{
"type": "temporary",
"reason": "KernelPanic",
"reason": "KernelOops",
"pattern": "divide error: 0000 \\[#\\d+\\] SMP"
},
{
Expand All @@ -44,12 +48,6 @@
"condition": "KernelDeadlock",
"reason": "DockerHung",
"pattern": "task docker:\\w+ blocked for more than \\w+ seconds\\."
},
{
"type": "permanent",
"condition": "KernelDeadlock",
"reason": "UnregisterNetDeviceIssue",
"pattern": "unregister_netdevice: waiting for \\w+ to become free. Usage count = \\d+"
}
]
}
2 changes: 0 additions & 2 deletions pkg/kernelmonitor/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ type MonitorConfig struct {
DefaultConditions []types.Condition `json:"conditions"`
// Rules are the rules kernel monitor will follow to parse the log file.
Rules []kerntypes.Rule `json:"rules"`
// StartPattern is the pattern of the start line
StartPattern string `json:"startPattern, omitempty"`
}

// applyDefaultConfiguration applies default configurations.
Expand Down
11 changes: 1 addition & 10 deletions pkg/kernelmonitor/kernel_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,7 @@ func NewKernelMonitorOrDie(configPath string) KernelMonitor {
glog.Fatalf("Failed to validate matching rules %#v: %v", k.config.Rules, err)
}
glog.Infof("Finish parsing log file: %+v", k.config)
k.watcher, err = logwatchers.GetLogWatcher(k.config.WatcherConfig)
if err != nil {
glog.Fatalf("Failed to create log watcher with watcher config %#v: %v", k.config.WatcherConfig, err)
}
k.watcher = logwatchers.GetLogWatcherOrDie(k.config.WatcherConfig)
k.buffer = NewLogBuffer(k.config.BufferSize)
// A 1000 size channel should be big enough.
k.output = make(chan *types.Status, 1000)
Expand Down Expand Up @@ -117,12 +114,6 @@ func (k *kernelMonitor) parseLog(log *kerntypes.KernelLog) {
// Once there is new log, kernel monitor will push it into the log buffer and try
// to match each rule. If any rule is matched, kernel monitor will report a status.
k.buffer.Push(log)
if matched := k.buffer.Match(k.config.StartPattern); len(matched) != 0 {
// Reset the condition if a start log shows up.
glog.Infof("Found start log %q, re-initialize the status", generateMessage(matched))
k.initializeStatus()
return
}
for _, rule := range k.config.Rules {
matched := k.buffer.Match(rule.Pattern)
if len(matched) == 0 {
Expand Down
11 changes: 5 additions & 6 deletions pkg/kernelmonitor/logwatchers/log_watchers.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ limitations under the License.
package logwatchers

import (
"fmt"

"k8s.io/node-problem-detector/pkg/kernelmonitor/logwatchers/types"

"github.com/golang/glog"
Expand All @@ -32,12 +30,13 @@ func registerLogWatcher(name string, create types.WatcherCreateFunc) {
createFuncs[name] = create
}

// GetLogWatcher get a log watcher based on the passed in configuration.
func GetLogWatcher(config types.WatcherConfig) (types.LogWatcher, error) {
// GetLogWatcherOrDie get a log watcher based on the passed in configuration.
// The function panics when encounts an error.
func GetLogWatcherOrDie(config types.WatcherConfig) types.LogWatcher {
create, ok := createFuncs[config.Plugin]
if !ok {
return nil, fmt.Errorf("no create function found for plugin %q", config.Plugin)
glog.Fatalf("No create function found for plugin %q", config.Plugin)
}
glog.Infof("Use log watcher of plugin %q", config.Plugin)
return create(config), nil
return create(config)
}
2 changes: 1 addition & 1 deletion pkg/kernelmonitor/logwatchers/register_syslog.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ const syslogPluginName = "syslog"

func init() {
// Register the syslog plugin.
registerLogWatcher(syslogPluginName, syslog.NewSyslogWatcher)
registerLogWatcher(syslogPluginName, syslog.NewSyslogWatcherOrDie)
}
22 changes: 15 additions & 7 deletions pkg/kernelmonitor/logwatchers/syslog/log_watcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"bufio"
"bytes"
"io"
"syscall"
"time"

utilclock "code.cloudfoundry.org/clock"
Expand All @@ -35,23 +36,30 @@ type syslogWatcher struct {
reader *bufio.Reader
closer io.Closer
logCh chan *kerntypes.KernelLog
uptime time.Time
tomb *util.Tomb
clock utilclock.Clock
}

// NewSyslogWatcher creates a new kernel log watcher.
func NewSyslogWatcher(cfg types.WatcherConfig) types.LogWatcher {
// NewSyslogWatcherOrDie creates a new kernel log watcher. The function panics
// when encounters an error.
func NewSyslogWatcherOrDie(cfg types.WatcherConfig) types.LogWatcher {
var info syscall.Sysinfo_t
if err := syscall.Sysinfo(&info); err != nil {
glog.Fatalf("Failed to get system info: %v", err)
}
return &syslogWatcher{
cfg: cfg,
tomb: util.NewTomb(),
cfg: cfg,
uptime: time.Now().Add(time.Duration(-info.Uptime * int64(time.Second))),
tomb: util.NewTomb(),
// A capacity 1000 buffer should be enough
logCh: make(chan *kerntypes.KernelLog, 1000),
clock: utilclock.NewClock(),
}
}

// Make sure NewSyslogWathcer is types.WatcherCreateFunc.
var _ types.WatcherCreateFunc = NewSyslogWatcher
var _ types.WatcherCreateFunc = NewSyslogWatcherOrDie

// Watch starts the syslog watcher.
func (s *syslogWatcher) Watch() (<-chan *kerntypes.KernelLog, error) {
Expand Down Expand Up @@ -113,8 +121,8 @@ func (s *syslogWatcher) watchLoop() {
glog.Warningf("Unable to parse line: %q, %v", line, err)
continue
}
// If the log is older than look back duration, discard it.
if s.clock.Since(log.Timestamp) > lookback {
// If the log is older than look back duration or system boot time, discard it.
if s.clock.Since(log.Timestamp) > lookback || log.Timestamp.Before(s.uptime) {
continue
}
s.logCh <- log
Expand Down
25 changes: 24 additions & 1 deletion pkg/kernelmonitor/logwatchers/syslog/log_watcher_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ func TestWatch(t *testing.T) {
testCases := []struct {
log string
logs []kerntypes.KernelLog
uptime time.Time
lookback string
}{
{
Expand Down Expand Up @@ -96,6 +97,26 @@ func TestWatch(t *testing.T) {
},
},
},
{
// The start point is at the end of the log file, we look back, but
// system rebooted at in the middle of the log file.
log: `Jan 2 03:04:03 kernel: [0.000000] 1
Jan 2 03:04:04 kernel: [1.000000] 2
Jan 2 03:04:05 kernel: [2.000000] 3
`,
uptime: time.Date(time.Now().Year(), time.January, 2, 3, 4, 4, 0, time.Local),
lookback: "2s",
logs: []kerntypes.KernelLog{
{
Timestamp: now.Add(-time.Second),
Message: "2",
},
{
Timestamp: now,
Message: "3",
},
},
},
}
for c, test := range testCases {
t.Logf("TestCase #%d: %#v", c+1, test)
Expand All @@ -108,11 +129,13 @@ func TestWatch(t *testing.T) {
_, err = f.Write([]byte(test.log))
assert.NoError(t, err)

w := NewSyslogWatcher(types.WatcherConfig{
w := NewSyslogWatcherOrDie(types.WatcherConfig{
Plugin: "syslog",
LogPath: f.Name(),
Lookback: test.lookback,
})
// Set the uptime.
w.(*syslogWatcher).uptime = test.uptime
// Set the fake clock.
w.(*syslogWatcher).clock = fakeClock
logCh, err := w.Watch()
Expand Down