Skip to content

Commit 62a1e9a

Browse files
committed
log/sockstatlog: add delay before writing logs to disk
Split apart polling of sockstats and logging them to disk. Add a 3 second delay before writing logs to disk to prevent an infinite upload loop when uploading stats to logcatcher. Fixes tailscale#7719 Signed-off-by: Will Norris <[email protected]>
1 parent 985535a commit 62a1e9a

File tree

2 files changed

+69
-17
lines changed

2 files changed

+69
-17
lines changed

log/sockstatlog/logger.go

Lines changed: 61 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// Copyright (c) Tailscale Inc & AUTHORS
22
// SPDX-License-Identifier: BSD-3-Clause
33

4-
// Package sockstatlog provides a logger for capturing and storing network socket stats.
4+
// Package sockstatlog provides a logger for capturing network socket stats for debugging.
5+
// Stats are collected at a frequency of 10 Hz and logged to disk.
6+
// Stats are only uploaded to the log server on demand.
57
package sockstatlog
68

79
import (
@@ -25,19 +27,29 @@ import (
2527
"tailscale.com/util/mak"
2628
)
2729

28-
// pollPeriod specifies how often to poll for socket stats.
29-
const pollPeriod = time.Second / 10
30+
// pollInterval specifies how often to poll for socket stats.
31+
const pollInterval = time.Second / 10
32+
33+
// logInterval specifies how often to log sockstat events to disk.
34+
// This delay is added to prevent an infinite loop when logs are uploaded,
35+
// which itself creates additional sockstat events.
36+
const logInterval = 3 * time.Second
3037

3138
// Logger logs statistics about network sockets.
3239
type Logger struct {
40+
// enabled identifies whether the logger is enabled.
3341
enabled atomic.Bool
3442

3543
ctx context.Context
3644
cancelFn context.CancelFunc
3745

38-
ticker *time.Ticker
39-
logf logger.Logf
46+
// eventCh is used to pass events from the poller to the logger.
47+
eventCh chan event
48+
49+
logf logger.Logf
4050

51+
// logger is the underlying logtail logger than manages log files on disk
52+
// and uploading to the log server.
4153
logger *logtail.Logger
4254
filch *filch.Filch
4355
tr http.RoundTripper
@@ -73,6 +85,7 @@ func SockstatLogID(logID logid.PublicID) logid.PrivateID {
7385
// NewLogger returns a new Logger that will store stats in logdir.
7486
// On platforms that do not support sockstat logging, a nil Logger will be returned.
7587
// The returned Logger is not yet enabled, and must be shut down with Shutdown when it is no longer needed.
88+
// Logs will be uploaded to the log server using a new log ID derived from the provided backend logID.
7689
func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger, error) {
7790
if !sockstats.IsAvailable {
7891
return nil, nil
@@ -88,10 +101,9 @@ func NewLogger(logdir string, logf logger.Logf, logID logid.PublicID) (*Logger,
88101
}
89102

90103
logger := &Logger{
91-
ticker: time.NewTicker(pollPeriod),
92-
logf: logf,
93-
filch: filch,
94-
tr: logpolicy.NewLogtailTransport(logtail.DefaultHost),
104+
logf: logf,
105+
filch: filch,
106+
tr: logpolicy.NewLogtailTransport(logtail.DefaultHost),
95107
}
96108
logger.logger = logtail.NewLogger(logtail.Config{
97109
BaseURL: logpolicy.LogURL(),
@@ -124,8 +136,14 @@ func (l *Logger) SetLoggingEnabled(v bool) {
124136
old := l.enabled.Load()
125137
if old != v && l.enabled.CompareAndSwap(old, v) {
126138
if v {
139+
if l.eventCh == nil {
140+
// eventCh should be large enough for the number of events that will occur within logInterval.
141+
// Add an extra second's worth of events to ensure we don't drop any.
142+
l.eventCh = make(chan event, (logInterval+time.Second)/pollInterval)
143+
}
127144
l.ctx, l.cancelFn = context.WithCancel(context.Background())
128145
go l.poll()
146+
go l.logEvents()
129147
} else {
130148
l.cancelFn()
131149
}
@@ -137,19 +155,21 @@ func (l *Logger) Write(p []byte) (int, error) {
137155
}
138156

139157
// poll fetches the current socket stats at the configured time interval,
140-
// calculates the delta since the last poll, and logs any non-zero values.
158+
// calculates the delta since the last poll,
159+
// and writes any non-zero values to the logger event channel.
141160
// This method does not return.
142161
func (l *Logger) poll() {
143162
// last is the last set of socket stats we saw.
144163
var lastStats *sockstats.SockStats
145164
var lastTime time.Time
146165

147-
enc := json.NewEncoder(l)
166+
ticker := time.NewTicker(pollInterval)
148167
for {
149168
select {
150169
case <-l.ctx.Done():
170+
ticker.Stop()
151171
return
152-
case t := <-l.ticker.C:
172+
case t := <-ticker.C:
153173
stats := sockstats.Get()
154174
if lastStats != nil {
155175
diffstats := delta(lastStats, stats)
@@ -162,9 +182,7 @@ func (l *Logger) poll() {
162182
if stats.CurrentInterfaceCellular {
163183
e.IsCellularInterface = 1
164184
}
165-
if err := enc.Encode(e); err != nil {
166-
l.logf("sockstatlog: error encoding log: %v", err)
167-
}
185+
l.eventCh <- e
168186
}
169187
}
170188
lastTime = t
@@ -173,6 +191,34 @@ func (l *Logger) poll() {
173191
}
174192
}
175193

194+
// logEvents reads events from the event channel at logInterval and logs them to disk.
195+
// This method does not return.
196+
func (l *Logger) logEvents() {
197+
enc := json.NewEncoder(l)
198+
flush := func() {
199+
for {
200+
select {
201+
case e := <-l.eventCh:
202+
if err := enc.Encode(e); err != nil {
203+
l.logf("sockstatlog: error encoding log: %v", err)
204+
}
205+
default:
206+
return
207+
}
208+
}
209+
}
210+
ticker := time.NewTicker(logInterval)
211+
for {
212+
select {
213+
case <-l.ctx.Done():
214+
ticker.Stop()
215+
return
216+
case <-ticker.C:
217+
flush()
218+
}
219+
}
220+
}
221+
176222
func (l *Logger) LogID() string {
177223
if l.logger == nil {
178224
return ""
@@ -189,7 +235,6 @@ func (l *Logger) Shutdown() {
189235
if l.cancelFn != nil {
190236
l.cancelFn()
191237
}
192-
l.ticker.Stop()
193238
l.filch.Close()
194239
l.logger.Shutdown(context.Background())
195240

logtail/logtail.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,7 @@ func NewLogger(cfg Config, logf tslogger.Logf) *Logger {
150150
timeNow: cfg.TimeNow,
151151
bo: backoff.NewBackoff("logtail", stdLogf, 30*time.Second),
152152
metricsDelta: cfg.MetricsDelta,
153+
sockstatsLabel: sockstats.LabelLogtailLogger,
153154

154155
procID: procID,
155156
includeProcSequence: cfg.IncludeProcSequence,
@@ -192,6 +193,7 @@ type Logger struct {
192193
metricsDelta func() string // or nil
193194
privateID logid.PrivateID
194195
httpDoCalls atomic.Int32
196+
sockstatsLabel sockstats.Label
195197

196198
procID uint32
197199
includeProcSequence bool
@@ -220,6 +222,11 @@ func (l *Logger) SetLinkMonitor(lm *monitor.Mon) {
220222
l.linkMonitor = lm
221223
}
222224

225+
// SetSockstatsLabel sets the label used in sockstat logs to identify network traffic from this logger.
226+
func (l *Logger) SetSockstatsLabel(label sockstats.Label) {
227+
l.sockstatsLabel = label
228+
}
229+
223230
// PrivateID returns the logger's private log ID.
224231
//
225232
// It exists for internal use only.
@@ -428,7 +435,7 @@ func (l *Logger) awaitInternetUp(ctx context.Context) {
428435
// origlen of -1 indicates that the body is not compressed.
429436
func (l *Logger) upload(ctx context.Context, body []byte, origlen int) (uploaded bool, err error) {
430437
const maxUploadTime = 45 * time.Second
431-
ctx = sockstats.WithSockStats(ctx, sockstats.LabelLogtailLogger)
438+
ctx = sockstats.WithSockStats(ctx, l.sockstatsLabel)
432439
ctx, cancel := context.WithTimeout(ctx, maxUploadTime)
433440
defer cancel()
434441

0 commit comments

Comments
 (0)