@@ -43,6 +43,7 @@ type healthChecker struct {
43
43
crictlPath string
44
44
healthCheckTimeout time.Duration
45
45
coolDownTime time.Duration
46
+ loopBackTime time.Duration
46
47
logPatternsToCheck map [string ]int
47
48
}
48
49
@@ -54,6 +55,7 @@ func NewHealthChecker(hco *options.HealthCheckerOptions) (types.HealthChecker, e
54
55
crictlPath : hco .CriCtlPath ,
55
56
healthCheckTimeout : hco .HealthCheckTimeout ,
56
57
coolDownTime : hco .CoolDownTime ,
58
+ loopBackTime : hco .LoopBackTime ,
57
59
systemdService : hco .SystemdService ,
58
60
logPatternsToCheck : hco .LogPatterns .GetLogPatternCountMap (),
59
61
}
@@ -139,11 +141,21 @@ func getHealthCheckFunc(hco *options.HealthCheckerOptions) func() (bool, error)
139
141
// CheckHealth checks for the health of the component and tries to repair if enabled.
140
142
// Returns true if healthy, false otherwise.
141
143
func (hc * healthChecker ) CheckHealth () (bool , error ) {
144
+ var logStartTime string
142
145
healthy , err := hc .healthCheckFunc ()
143
146
if err != nil {
144
147
return healthy , err
145
148
}
146
- logPatternHealthy , err := logPatternHealthCheck (hc .systemdService , hc .logPatternsToCheck )
149
+ uptime , err := hc .uptimeFunc ()
150
+ if err != nil {
151
+ return false , err
152
+ }
153
+ if hc .loopBackTime > 0 && uptime > hc .loopBackTime {
154
+ logStartTime = time .Now ().Add (- hc .loopBackTime ).Format (types .LogParsingTimeLayout )
155
+ } else {
156
+ logStartTime = time .Now ().Add (- uptime ).Format (types .LogParsingTimeLayout )
157
+ }
158
+ logPatternHealthy , err := logPatternHealthCheck (hc .systemdService , logStartTime , hc .logPatternsToCheck )
147
159
if err != nil {
148
160
return logPatternHealthy , err
149
161
}
@@ -154,10 +166,6 @@ func (hc *healthChecker) CheckHealth() (bool, error) {
154
166
// Attempt repair based on flag.
155
167
if hc .enableRepair {
156
168
// repair if the service has been up for the cool down period.
157
- uptime , err := hc .uptimeFunc ()
158
- if err != nil {
159
- glog .Infof ("error in getting uptime for %v: %v\n " , hc .component , err )
160
- }
161
169
glog .Infof ("%v is unhealthy, component uptime: %v\n " , hc .component , uptime )
162
170
if uptime > hc .coolDownTime {
163
171
glog .Infof ("%v cooldown period of %v exceeded, repairing" , hc .component , hc .coolDownTime )
@@ -182,19 +190,10 @@ func execCommand(timeout time.Duration, command string, args ...string) (string,
182
190
183
191
// logPatternHealthCheck checks for the provided logPattern occurrences in the service logs.
184
192
// Returns true if the pattern is empty or does not exist logThresholdCount times since start of service, false otherwise.
185
- func logPatternHealthCheck (service string , logPatternsToCheck map [string ]int ) (bool , error ) {
193
+ func logPatternHealthCheck (service , logStartTime string , logPatternsToCheck map [string ]int ) (bool , error ) {
186
194
if len (logPatternsToCheck ) == 0 {
187
195
return true , nil
188
196
}
189
- uptimeFunc := getUptimeFunc (service )
190
- uptime , err := uptimeFunc ()
191
- if err != nil {
192
- return true , err
193
- }
194
- logStartTime := time .Now ().Add (- uptime ).Format (types .LogParsingTimeLayout )
195
- if err != nil {
196
- return true , err
197
- }
198
197
for pattern , count := range logPatternsToCheck {
199
198
healthy , err := checkForPattern (service , logStartTime , pattern , count )
200
199
if err != nil || ! healthy {
0 commit comments