Skip to content

Commit 228f0f5

Browse files
authored
Merge pull request #554 from jeremyje/osversion
Add support for basic system metrics for Windows.
2 parents c7ce65d + d493387 commit 228f0f5

19 files changed

+660
-219
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ make clean windows-binaries
228228
make test
229229
230230
# Run with containerd log monitoring enabled in Command Prompt. (Assumes containerd is installed.)
231-
%CD%\bin\windows_amd64\node-problem-detector.exe --logtostderr --enable-k8s-exporter=false --config.system-log-monitor=%CD%\config\windows-containerd-monitor-filelog.json
231+
%CD%\output\windows_amd64\node-problem-detector.exe --logtostderr --enable-k8s-exporter=false --config.system-log-monitor=%CD%\config\windows-containerd-monitor-filelog.json --config.system-stats-monitor=config\windows-system-stats-monitor.json
232232
233233
# Configure NPD to run as a Windows Service
234234
sc.exe create NodeProblemDetector binpath= "%CD%\node-problem-detector.exe [FLAGS]" start= demand
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
{
2+
"cpu": {
3+
"metricsConfigs": {
4+
"cpu/load_15m": {
5+
"displayName": "cpu/load_15m"
6+
},
7+
"cpu/load_1m": {
8+
"displayName": "cpu/load_1m"
9+
},
10+
"cpu/load_5m": {
11+
"displayName": "cpu/load_5m"
12+
},
13+
"cpu/runnable_task_count": {
14+
"displayName": "cpu/runnable_task_count"
15+
},
16+
"cpu/usage_time": {
17+
"displayName": "cpu/usage_time"
18+
},
19+
"system/cpu_stat": {
20+
"displayName": "system/cpu_stat"
21+
},
22+
"system/interrupts_total": {
23+
"displayName": "system/interrupts_total"
24+
},
25+
"system/processes_total": {
26+
"displayName": "system/processes_total"
27+
},
28+
"system/procs_blocked": {
29+
"displayName": "system/procs_blocked"
30+
},
31+
"system/procs_running": {
32+
"displayName": "system/procs_running"
33+
}
34+
}
35+
},
36+
"disk": {
37+
"includeAllAttachedBlk": false,
38+
"includeRootBlk": false,
39+
"lsblkTimeout": "60s",
40+
"metricsConfigs": {
41+
"disk/avg_queue_len": {
42+
"displayName": "disk/avg_queue_len"
43+
},
44+
"disk/bytes_used": {
45+
"displayName": "disk/bytes_used"
46+
},
47+
"disk/io_time": {
48+
"displayName": "disk/io_time"
49+
},
50+
"disk/merged_operation_count": {
51+
"displayName": "disk/merged_operation_count"
52+
},
53+
"disk/operation_bytes_count": {
54+
"displayName": "disk/operation_bytes_count"
55+
},
56+
"disk/operation_count": {
57+
"displayName": "disk/operation_count"
58+
},
59+
"disk/operation_time": {
60+
"displayName": "disk/operation_time"
61+
},
62+
"disk/weighted_io": {
63+
"displayName": "disk/weighted_io"
64+
}
65+
}
66+
},
67+
"host": {
68+
"metricsConfigs": {
69+
"host/uptime": {
70+
"displayName": "host/uptime"
71+
}
72+
}
73+
},
74+
"invokeInterval": "60s",
75+
"memory": {
76+
"metricsConfigs": {
77+
"memory/anonymous_used": {
78+
"displayName": "memory/anonymous_used"
79+
},
80+
"memory/bytes_used": {
81+
"displayName": "memory/bytes_used"
82+
},
83+
"memory/dirty_used": {
84+
"displayName": "memory/dirty_used"
85+
},
86+
"memory/page_cache_used": {
87+
"displayName": "memory/page_cache_used"
88+
},
89+
"memory/unevictable_used": {
90+
"displayName": "memory/unevictable_used"
91+
}
92+
}
93+
}
94+
}

pkg/systemstatsmonitor/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,3 +116,14 @@ Below metrics are collected from `net` component:
116116
* `net/tx_compressed`: Cumulative count of compressed packets transmitted by the device driver.
117117

118118
All of the above have `interface_name` label for the net interface.
119+
120+
## Windows Support
121+
122+
NPD has preliminary support for system stats monitor. The following modules are supported:
123+
124+
* CPU - Idle, System, and User metrics.
125+
* Memory - Used and available.
126+
* Disk - Space used and free.
127+
* Uptime - within kernel version and product name.
128+
129+
All the data is currently retried from the `github.com/shirou/gopsutil` library. Any data parsed directly from `/proc` from Linux is not supported on Windows. There will be later integration to use WMI (Windows Management Instrumentation) to gather node metrics.

pkg/systemstatsmonitor/cpu_collector.go

Lines changed: 0 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,8 @@ limitations under the License.
1717
package systemstatsmonitor
1818

1919
import (
20-
"fmt"
21-
2220
"github.com/golang/glog"
23-
"github.com/prometheus/procfs"
2421
"github.com/shirou/gopsutil/cpu"
25-
"github.com/shirou/gopsutil/load"
2622

2723
ssmtypes "k8s.io/node-problem-detector/pkg/systemstatsmonitor/types"
2824
"k8s.io/node-problem-detector/pkg/util/metrics"
@@ -174,24 +170,6 @@ func NewCPUCollectorOrDie(cpuConfig *ssmtypes.CPUStatsConfig) *cpuCollector {
174170
return &cc
175171
}
176172

177-
func (cc *cpuCollector) recordLoad() {
178-
if cc.mRunnableTaskCount == nil {
179-
return
180-
}
181-
182-
loadAvg, err := load.Avg()
183-
if err != nil {
184-
glog.Errorf("Failed to retrieve average CPU load: %v", err)
185-
return
186-
}
187-
188-
cc.mRunnableTaskCount.Record(map[string]string{}, loadAvg.Load1)
189-
190-
cc.mCpuLoad1m.Record(map[string]string{}, loadAvg.Load1)
191-
cc.mCpuLoad5m.Record(map[string]string{}, loadAvg.Load5)
192-
cc.mCpuLoad15m.Record(map[string]string{}, loadAvg.Load15)
193-
}
194-
195173
func (cc *cpuCollector) recordUsage() {
196174
if cc.mUsageTime == nil {
197175
return
@@ -236,46 +214,6 @@ func (cc *cpuCollector) recordUsage() {
236214
cc.lastUsageTime["guest_nice"] = clockTick * timersStat.GuestNice
237215
}
238216

239-
func (cc *cpuCollector) recordSystemStats() {
240-
fs, err := procfs.NewFS("/proc")
241-
stats, err := fs.Stat()
242-
if err != nil {
243-
glog.Errorf("Failed to retrieve cpu/process stats: %v", err)
244-
return
245-
}
246-
247-
cc.mSystemProcessesTotal.Record(map[string]string{}, int64(stats.ProcessCreated))
248-
cc.mSystemProcsRunning.Record(map[string]string{}, int64(stats.ProcessesRunning))
249-
cc.mSystemProcsBlocked.Record(map[string]string{}, int64(stats.ProcessesBlocked))
250-
cc.mSystemInterruptsTotal.Record(map[string]string{}, int64(stats.IRQTotal))
251-
252-
for i, c := range stats.CPU {
253-
tags := map[string]string{}
254-
tags[cpuLabel] = fmt.Sprintf("cpu%d", i)
255-
256-
tags[stageLabel] = "user"
257-
cc.mSystemCPUStat.Record(tags, c.User)
258-
tags[stageLabel] = "nice"
259-
cc.mSystemCPUStat.Record(tags, c.Nice)
260-
tags[stageLabel] = "system"
261-
cc.mSystemCPUStat.Record(tags, c.System)
262-
tags[stageLabel] = "idle"
263-
cc.mSystemCPUStat.Record(tags, c.Idle)
264-
tags[stageLabel] = "iowait"
265-
cc.mSystemCPUStat.Record(tags, c.Iowait)
266-
tags[stageLabel] = "iRQ"
267-
cc.mSystemCPUStat.Record(tags, c.IRQ)
268-
tags[stageLabel] = "softIRQ"
269-
cc.mSystemCPUStat.Record(tags, c.SoftIRQ)
270-
tags[stageLabel] = "steal"
271-
cc.mSystemCPUStat.Record(tags, c.Steal)
272-
tags[stageLabel] = "guest"
273-
cc.mSystemCPUStat.Record(tags, c.Guest)
274-
tags[stageLabel] = "guestNice"
275-
cc.mSystemCPUStat.Record(tags, c.GuestNice)
276-
}
277-
}
278-
279217
func (cc *cpuCollector) collect() {
280218
if cc == nil {
281219
return
Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package systemstatsmonitor
18+
19+
import (
20+
"fmt"
21+
22+
"github.com/golang/glog"
23+
"github.com/prometheus/procfs"
24+
"github.com/shirou/gopsutil/load"
25+
)
26+
27+
func (cc *cpuCollector) recordLoad() {
28+
if cc.mRunnableTaskCount == nil {
29+
return
30+
}
31+
32+
loadAvg, err := load.Avg()
33+
if err != nil {
34+
glog.Errorf("Failed to retrieve average CPU load: %v", err)
35+
return
36+
}
37+
38+
cc.mRunnableTaskCount.Record(map[string]string{}, loadAvg.Load1)
39+
40+
cc.mCpuLoad1m.Record(map[string]string{}, loadAvg.Load1)
41+
cc.mCpuLoad5m.Record(map[string]string{}, loadAvg.Load5)
42+
cc.mCpuLoad15m.Record(map[string]string{}, loadAvg.Load15)
43+
}
44+
45+
func (cc *cpuCollector) recordSystemStats() {
46+
fs, err := procfs.NewFS("/proc")
47+
stats, err := fs.Stat()
48+
if err != nil {
49+
glog.Errorf("Failed to retrieve cpu/process stats: %v", err)
50+
return
51+
}
52+
53+
cc.mSystemProcessesTotal.Record(map[string]string{}, int64(stats.ProcessCreated))
54+
cc.mSystemProcsRunning.Record(map[string]string{}, int64(stats.ProcessesRunning))
55+
cc.mSystemProcsBlocked.Record(map[string]string{}, int64(stats.ProcessesBlocked))
56+
cc.mSystemInterruptsTotal.Record(map[string]string{}, int64(stats.IRQTotal))
57+
58+
for i, c := range stats.CPU {
59+
tags := map[string]string{}
60+
tags[cpuLabel] = fmt.Sprintf("cpu%d", i)
61+
62+
tags[stageLabel] = "user"
63+
cc.mSystemCPUStat.Record(tags, c.User)
64+
tags[stageLabel] = "nice"
65+
cc.mSystemCPUStat.Record(tags, c.Nice)
66+
tags[stageLabel] = "system"
67+
cc.mSystemCPUStat.Record(tags, c.System)
68+
tags[stageLabel] = "idle"
69+
cc.mSystemCPUStat.Record(tags, c.Idle)
70+
tags[stageLabel] = "iowait"
71+
cc.mSystemCPUStat.Record(tags, c.Iowait)
72+
tags[stageLabel] = "iRQ"
73+
cc.mSystemCPUStat.Record(tags, c.IRQ)
74+
tags[stageLabel] = "softIRQ"
75+
cc.mSystemCPUStat.Record(tags, c.SoftIRQ)
76+
tags[stageLabel] = "steal"
77+
cc.mSystemCPUStat.Record(tags, c.Steal)
78+
tags[stageLabel] = "guest"
79+
cc.mSystemCPUStat.Record(tags, c.Guest)
80+
tags[stageLabel] = "guestNice"
81+
cc.mSystemCPUStat.Record(tags, c.GuestNice)
82+
}
83+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/*
2+
Copyright 2019 The Kubernetes Authors All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package systemstatsmonitor
18+
19+
import (
20+
"encoding/json"
21+
"testing"
22+
23+
ssmtypes "k8s.io/node-problem-detector/pkg/systemstatsmonitor/types"
24+
)
25+
26+
const (
27+
fakeCPUConfig = `
28+
{
29+
"metricsConfigs": {
30+
"cpu/load_15m": {
31+
"displayName": "cpu/load_15m"
32+
},
33+
"cpu/load_1m": {
34+
"displayName": "cpu/load_1m"
35+
},
36+
"cpu/load_5m": {
37+
"displayName": "cpu/load_5m"
38+
},
39+
"cpu/runnable_task_count": {
40+
"displayName": "cpu/runnable_task_count"
41+
},
42+
"cpu/usage_time": {
43+
"displayName": "cpu/usage_time"
44+
},
45+
"system/cpu_stat": {
46+
"displayName": "system/cpu_stat"
47+
},
48+
"system/interrupts_total": {
49+
"displayName": "system/interrupts_total"
50+
},
51+
"system/processes_total": {
52+
"displayName": "system/processes_total"
53+
},
54+
"system/procs_blocked": {
55+
"displayName": "system/procs_blocked"
56+
},
57+
"system/procs_running": {
58+
"displayName": "system/procs_running"
59+
}
60+
}
61+
}
62+
`
63+
)
64+
65+
func TestCpuCollector(t *testing.T) {
66+
cfg := &ssmtypes.CPUStatsConfig{}
67+
if err := json.Unmarshal([]byte(fakeCPUConfig), cfg); err != nil {
68+
t.Fatalf("cannot load cpu config: %s", err)
69+
}
70+
mc := NewCPUCollectorOrDie(cfg)
71+
mc.collect()
72+
}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
/*
2+
Copyright 2020 The Kubernetes Authors All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package systemstatsmonitor
18+
19+
func (cc *cpuCollector) recordLoad() {
20+
// not supported
21+
}
22+
23+
func (cc *cpuCollector) recordSystemStats() {
24+
// not supported
25+
}

0 commit comments

Comments
 (0)