Skip to content

Commit 0876f61

Browse files
authored
Merge pull request kubernetes#83 from Random-Liu/add-npd-endpoint
Add NPD endpoints: /debug/pprof, /healthz, /conditions.
2 parents b66c4df + 20ffe37 commit 0876f61

File tree

6 files changed

+120
-13
lines changed

6 files changed

+120
-13
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ version:
8080

8181
./bin/node-problem-detector: $(PKG_SOURCES)
8282
CGO_ENABLED=$(CGO_ENABLED) GOOS=linux go build -o bin/node-problem-detector \
83-
-ldflags '-w -X $(PKG)/pkg/version.version=$(VERSION)' \
83+
-ldflags '-X $(PKG)/pkg/version.version=$(VERSION)' \
8484
$(BUILD_TAGS) cmd/node_problem_detector.go
8585

8686
Dockerfile: Dockerfile.in

cmd/node_problem_detector.go

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@ package main
1818

1919
import (
2020
"flag"
21-
"fmt"
21+
"net"
22+
"net/http"
23+
_ "net/http/pprof"
2224
"net/url"
2325
"os"
26+
"strconv"
2427

2528
"github.com/golang/glog"
2629

@@ -35,6 +38,8 @@ var (
3538
apiServerOverride = flag.String("apiserver-override", "", "Custom URI used to connect to Kubernetes ApiServer")
3639
printVersion = flag.Bool("version", false, "Print version information and quit")
3740
hostnameOverride = flag.String("hostname-override", "", "Custom node name used to override hostname")
41+
serverPort = flag.Int("port", 10256, "The port to bind the node problem detector server. Use 0 to disable.")
42+
serverAddress = flag.String("address", "127.0.0.1", "The address to bind the node problem detector server.")
3843
)
3944

4045
func validateCmdParams() {
@@ -67,12 +72,27 @@ func getNodeNameOrDie() string {
6772
// environments.
6873
nodeName, err := os.Hostname()
6974
if err != nil {
70-
panic(fmt.Sprintf("Failed to get host name: %v", err))
75+
glog.Fatalf("Failed to get host name: %v", err)
7176
}
7277

7378
return nodeName
7479
}
7580

81+
func startHTTPServer(p problemdetector.ProblemDetector) {
82+
// Add healthz http request handler. Always return ok now, add more health check
83+
// logic in the future.
84+
http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
85+
w.WriteHeader(http.StatusOK)
86+
w.Write([]byte("ok"))
87+
})
88+
// Add the http handlers in problem detector.
89+
p.RegisterHTTPHandlers()
90+
err := http.ListenAndServe(net.JoinHostPort(*serverAddress, strconv.Itoa(*serverPort)), nil)
91+
if err != nil {
92+
glog.Fatalf("Failed to start server: %v", err)
93+
}
94+
}
95+
7696
func main() {
7797
flag.Parse()
7898
validateCmdParams()
@@ -86,6 +106,12 @@ func main() {
86106

87107
k := kernelmonitor.NewKernelMonitorOrDie(*kernelMonitorConfigPath)
88108
p := problemdetector.NewProblemDetector(k, *apiServerOverride, nodeName)
109+
110+
// Start http server.
111+
if *serverPort > 0 {
112+
startHTTPServer(p)
113+
}
114+
89115
if err := p.Run(); err != nil {
90116
glog.Fatalf("Problem detector failed with error: %v", err)
91117
}

pkg/condition/manager.go

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -54,19 +54,27 @@ type ConditionManager interface {
5454
Start()
5555
// UpdateCondition updates a specific condition.
5656
UpdateCondition(types.Condition)
57+
// GetConditions returns all current conditions.
58+
GetConditions() []types.Condition
5759
}
5860

5961
type conditionManager struct {
62+
// Only 2 fields will be accessed by more than one goroutines at the same time:
63+
// * `updates`: updates will be written by random caller and the sync routine,
64+
// so it needs to be protected by write lock in both `UpdateCondition` and
65+
// `needUpdates`.
66+
// * `conditions`: conditions will only be written in the sync routine, but
67+
// it will be read by random caller and the sync routine. So it needs to be
68+
// protected by write lock in `needUpdates` and read lock in `GetConditions`.
69+
// No lock is needed in `sync`, because it is in the same goroutine with the
70+
// write operation.
71+
sync.RWMutex
6072
clock clock.Clock
6173
latestTry time.Time
6274
resyncNeeded bool
6375
client problemclient.Client
64-
// updatesLock is the lock protecting updates. Only the field `updates`
65-
// will be accessed by random caller and the sync routine, so only it
66-
// needs to be protected.
67-
updatesLock sync.Mutex
68-
updates map[string]types.Condition
69-
conditions map[string]types.Condition
76+
updates map[string]types.Condition
77+
conditions map[string]types.Condition
7078
}
7179

7280
// NewConditionManager creates a condition manager.
@@ -84,13 +92,23 @@ func (c *conditionManager) Start() {
8492
}
8593

8694
func (c *conditionManager) UpdateCondition(condition types.Condition) {
87-
c.updatesLock.Lock()
88-
defer c.updatesLock.Unlock()
95+
c.Lock()
96+
defer c.Unlock()
8997
// New node condition will override the old condition, because we only need the newest
9098
// condition for each condition type.
9199
c.updates[condition.Type] = condition
92100
}
93101

102+
func (c *conditionManager) GetConditions() []types.Condition {
103+
c.RLock()
104+
defer c.RUnlock()
105+
var conditions []types.Condition
106+
for _, condition := range c.conditions {
107+
conditions = append(conditions, condition)
108+
}
109+
return conditions
110+
}
111+
94112
func (c *conditionManager) syncLoop() {
95113
updateCh := c.clock.Tick(updatePeriod)
96114
for {
@@ -105,8 +123,8 @@ func (c *conditionManager) syncLoop() {
105123

106124
// needUpdates checks whether there are recent updates.
107125
func (c *conditionManager) needUpdates() bool {
108-
c.updatesLock.Lock()
109-
defer c.updatesLock.Unlock()
126+
c.Lock()
127+
defer c.Unlock()
110128
needUpdate := false
111129
for t, update := range c.updates {
112130
if !reflect.DeepEqual(c.conditions[t], update) {

pkg/condition/manager_test.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,18 @@ func TestNeedUpdates(t *testing.T) {
8181
}
8282
}
8383

84+
func TestGetConditions(t *testing.T) {
85+
m, _, _ := newTestManager()
86+
assert.Empty(t, m.GetConditions())
87+
testCondition1 := newTestCondition("TestCondition1")
88+
testCondition2 := newTestCondition("TestCondition2")
89+
m.UpdateCondition(testCondition1)
90+
m.UpdateCondition(testCondition2)
91+
assert.True(t, m.needUpdates())
92+
assert.Contains(t, m.GetConditions(), testCondition1)
93+
assert.Contains(t, m.GetConditions(), testCondition2)
94+
}
95+
8496
func TestResync(t *testing.T) {
8597
m, fakeClient, fakeClock := newTestManager()
8698
condition := newTestCondition("TestCondition")

pkg/problemdetector/problem_detector.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ limitations under the License.
1717
package problemdetector
1818

1919
import (
20+
"net/http"
21+
2022
"github.com/golang/glog"
2123

2224
"k8s.io/kubernetes/pkg/util/clock"
@@ -30,6 +32,7 @@ import (
3032
// ProblemDetector collects statuses from all problem daemons and update the node condition and send node event.
3133
type ProblemDetector interface {
3234
Run() error
35+
RegisterHTTPHandlers()
3336
}
3437

3538
type problemDetector struct {
@@ -70,3 +73,11 @@ func (p *problemDetector) Run() error {
7073
}
7174
}
7275
}
76+
77+
// RegisterHTTPHandlers registers http handlers of node problem detector.
78+
func (p *problemDetector) RegisterHTTPHandlers() {
79+
// Add the handler to serve condition http request.
80+
http.HandleFunc("/conditions", func(w http.ResponseWriter, r *http.Request) {
81+
util.ReturnHTTPJson(w, p.conditionManager.GetConditions())
82+
})
83+
}

pkg/util/http.go

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
Copyright 2017 The Kubernetes Authors All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package util
18+
19+
import (
20+
"encoding/json"
21+
"net/http"
22+
)
23+
24+
// ReturnHTTPJson generates json http response.
25+
func ReturnHTTPJson(w http.ResponseWriter, object interface{}) {
26+
data, err := json.Marshal(object)
27+
if err != nil {
28+
ReturnHTTPError(w, err)
29+
return
30+
}
31+
w.Header().Set("Content-type", "application/json")
32+
w.WriteHeader(http.StatusOK)
33+
w.Write(data)
34+
}
35+
36+
// ReturnHTTPError generates error http response.
37+
func ReturnHTTPError(w http.ResponseWriter, err error) {
38+
w.WriteHeader(http.StatusInternalServerError)
39+
w.Write([]byte(err.Error()))
40+
}

0 commit comments

Comments
 (0)