Skip to content

Commit b6c5ddf

Browse files
committed
add loopbacktime to reduce time of journalctl call
1 parent cb8534b commit b6c5ddf

File tree

4 files changed

+20
-15
lines changed

4 files changed

+20
-15
lines changed

cmd/healthchecker/options/options.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type HealthCheckerOptions struct {
3939
CriCtlPath string
4040
CriSocketPath string
4141
CoolDownTime time.Duration
42+
LoopBackTime time.Duration
4243
HealthCheckTimeout time.Duration
4344
LogPatterns types.LogPatternFlag
4445
}
@@ -56,6 +57,8 @@ func (hco *HealthCheckerOptions) AddFlags(fs *pflag.FlagSet) {
5657
"The path to the cri socket. Used with crictl to specify the socket path.")
5758
fs.DurationVar(&hco.CoolDownTime, "cooldown-time", types.DefaultCoolDownTime,
5859
"The duration to wait for the service to be up before attempting repair.")
60+
fs.DurationVar(&hco.LoopBackTime, "loopback-time", types.DefaultLoopBackTime,
61+
"The duration to loop back, if it is 0, health-check will check from start time.")
5962
fs.DurationVar(&hco.HealthCheckTimeout, "health-check-timeout", types.DefaultHealthCheckTimeout,
6063
"The time to wait before marking the component as unhealthy.")
6164
fs.Var(&hco.LogPatterns, "log-pattern",

config/health-checker-kubelet.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
"--component=kubelet",
2626
"--enable-repair=true",
2727
"--cooldown-time=1m",
28+
"--loopback-time=0",
2829
"--health-check-timeout=10s"
2930
],
3031
"timeout": "3m"

pkg/healthchecker/health_checker.go

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ type healthChecker struct {
4343
crictlPath string
4444
healthCheckTimeout time.Duration
4545
coolDownTime time.Duration
46+
loopBackTime time.Duration
4647
logPatternsToCheck map[string]int
4748
}
4849

@@ -54,6 +55,7 @@ func NewHealthChecker(hco *options.HealthCheckerOptions) (types.HealthChecker, e
5455
crictlPath: hco.CriCtlPath,
5556
healthCheckTimeout: hco.HealthCheckTimeout,
5657
coolDownTime: hco.CoolDownTime,
58+
loopBackTime: hco.LoopBackTime,
5759
systemdService: hco.SystemdService,
5860
logPatternsToCheck: hco.LogPatterns.GetLogPatternCountMap(),
5961
}
@@ -139,11 +141,22 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error)
139141
// CheckHealth checks for the health of the component and tries to repair if enabled.
140142
// Returns true if healthy, false otherwise.
141143
func (hc *healthChecker) CheckHealth() (bool, error) {
144+
var logStartTime string
142145
healthy, err := hc.healthCheckFunc()
143146
if err != nil {
144147
return healthy, err
145148
}
146-
logPatternHealthy, err := logPatternHealthCheck(hc.systemdService, hc.logPatternsToCheck)
149+
uptime, err := hc.uptimeFunc()
150+
if err != nil {
151+
glog.Warningf("Failed to get the uptime: %+v", err)
152+
return true, err
153+
}
154+
if hc.loopBackTime > 0 && uptime > hc.loopBackTime {
155+
logStartTime = time.Now().Add(-hc.loopBackTime).Format(types.LogParsingTimeLayout)
156+
} else {
157+
logStartTime = time.Now().Add(-uptime).Format(types.LogParsingTimeLayout)
158+
}
159+
logPatternHealthy, err := logPatternHealthCheck(hc.systemdService, logStartTime, hc.logPatternsToCheck)
147160
if err != nil {
148161
return logPatternHealthy, err
149162
}
@@ -154,10 +167,6 @@ func (hc *healthChecker) CheckHealth() (bool, error) {
154167
// Attempt repair based on flag.
155168
if hc.enableRepair {
156169
// repair if the service has been up for the cool down period.
157-
uptime, err := hc.uptimeFunc()
158-
if err != nil {
159-
glog.Infof("error in getting uptime for %v: %v\n", hc.component, err)
160-
}
161170
glog.Infof("%v is unhealthy, component uptime: %v\n", hc.component, uptime)
162171
if uptime > hc.coolDownTime {
163172
glog.Infof("%v cooldown period of %v exceeded, repairing", hc.component, hc.coolDownTime)
@@ -182,19 +191,10 @@ func execCommand(timeout time.Duration, command string, args ...string) (string,
182191

183192
// logPatternHealthCheck checks for the provided logPattern occurrences in the service logs.
184193
// Returns true if the pattern is empty or does not exist logThresholdCount times since start of service, false otherwise.
185-
func logPatternHealthCheck(service string, logPatternsToCheck map[string]int) (bool, error) {
194+
func logPatternHealthCheck(service, logStartTime string, logPatternsToCheck map[string]int) (bool, error) {
186195
if len(logPatternsToCheck) == 0 {
187196
return true, nil
188197
}
189-
uptimeFunc := getUptimeFunc(service)
190-
uptime, err := uptimeFunc()
191-
if err != nil {
192-
return true, err
193-
}
194-
logStartTime := time.Now().Add(-uptime).Format(types.LogParsingTimeLayout)
195-
if err != nil {
196-
return true, err
197-
}
198198
for pattern, count := range logPatternsToCheck {
199199
healthy, err := checkForPattern(service, logStartTime, pattern, count)
200200
if err != nil || !healthy {

pkg/healthchecker/types/types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
)
2626

2727
const (
28+
DefaultLoopBackTime = 0 * time.Minute
2829
DefaultCoolDownTime = 2 * time.Minute
2930
DefaultHealthCheckTimeout = 10 * time.Second
3031
CmdTimeout = 10 * time.Second

0 commit comments

Comments
 (0)