Skip to content

Commit b8156a0

Browse files
authored
Add OTLP message limit config and logic (#6333)
Signed-off-by: SungJin1212 <[email protected]>
1 parent ee62ab2 commit b8156a0

File tree

7 files changed

+233
-41
lines changed

7 files changed

+233
-41
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
* [FEATURE] Ruler: Minimize chances of missed rule group evaluations that can occur due to OOM kills, bad underlying nodes, or due to an unhealthy ruler that appears in the ring as healthy. This feature is enabled via `-ruler.enable-ha-evaluation` flag. #6129
1616
* [FEATURE] Store Gateway: Add an in-memory chunk cache. #6245
1717
* [FEATURE] Chunk Cache: Support multi level cache and add metrics. #6249
18+
* [ENHANCEMENT] OTLP: Add `-distributor.otlp-max-recv-msg-size` flag to limit OTLP request size in bytes. #6333
1819
* [ENHANCEMENT] S3 Bucket Client: Add a list objects version configs to configure list api object version. #6280
1920
* [ENHANCEMENT] OpenStack Swift: Add application credential configs for Openstack swift object storage backend. #6255
2021
* [ENHANCEMENT] Query Frontend: Add new query stats metrics `cortex_query_samples_scanned_total` and `cortex_query_peak_samples` to track scannedSamples and peakSample per user. #6228

docs/configuration/config-file-reference.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2562,6 +2562,10 @@ ha_tracker:
25622562
# CLI flag: -distributor.max-recv-msg-size
25632563
[max_recv_msg_size: <int> | default = 104857600]
25642564
2565+
# Maximum OTLP request size in bytes that the Distributor can accept.
2566+
# CLI flag: -distributor.otlp-max-recv-msg-size
2567+
[otlp_max_recv_msg_size: <int> | default = 104857600]
2568+
25652569
# Timeout for downstream ingesters.
25662570
# CLI flag: -distributor.remote-timeout
25672571
[remote_timeout: <duration> | default = 2s]

pkg/api/api.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ func (a *API) RegisterDistributor(d *distributor.Distributor, pushConfig distrib
278278
distributorpb.RegisterDistributorServer(a.server.GRPC, d)
279279

280280
a.RegisterRoute("/api/v1/push", push.Handler(pushConfig.MaxRecvMsgSize, a.sourceIPs, a.cfg.wrapDistributorPush(d)), true, "POST")
281-
a.RegisterRoute("/api/v1/otlp/v1/metrics", push.OTLPHandler(overrides, pushConfig.OTLPConfig, a.sourceIPs, a.cfg.wrapDistributorPush(d)), true, "POST")
281+
a.RegisterRoute("/api/v1/otlp/v1/metrics", push.OTLPHandler(pushConfig.OTLPMaxRecvMsgSize, overrides, pushConfig.OTLPConfig, a.sourceIPs, a.cfg.wrapDistributorPush(d)), true, "POST")
282282

283283
a.indexPage.AddLink(SectionAdminEndpoints, "/distributor/ring", "Distributor Ring Status")
284284
a.indexPage.AddLink(SectionAdminEndpoints, "/distributor/all_user_stats", "Usage Statistics")

pkg/distributor/distributor.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,10 @@ type Config struct {
135135

136136
HATrackerConfig HATrackerConfig `yaml:"ha_tracker"`
137137

138-
MaxRecvMsgSize int `yaml:"max_recv_msg_size"`
139-
RemoteTimeout time.Duration `yaml:"remote_timeout"`
140-
ExtraQueryDelay time.Duration `yaml:"extra_queue_delay"`
138+
MaxRecvMsgSize int `yaml:"max_recv_msg_size"`
139+
OTLPMaxRecvMsgSize int `yaml:"otlp_max_recv_msg_size"`
140+
RemoteTimeout time.Duration `yaml:"remote_timeout"`
141+
ExtraQueryDelay time.Duration `yaml:"extra_queue_delay"`
141142

142143
ShardingStrategy string `yaml:"sharding_strategy"`
143144
ShardByAllLabels bool `yaml:"shard_by_all_labels"`
@@ -186,6 +187,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
186187
cfg.DistributorRing.RegisterFlags(f)
187188

188189
f.IntVar(&cfg.MaxRecvMsgSize, "distributor.max-recv-msg-size", 100<<20, "remote_write API max receive message size (bytes).")
190+
f.IntVar(&cfg.OTLPMaxRecvMsgSize, "distributor.otlp-max-recv-msg-size", 100<<20, "Maximum OTLP request size in bytes that the Distributor can accept.")
189191
f.DurationVar(&cfg.RemoteTimeout, "distributor.remote-timeout", 2*time.Second, "Timeout for downstream ingesters.")
190192
f.DurationVar(&cfg.ExtraQueryDelay, "distributor.extra-query-delay", 0, "Time to wait before sending more than the minimum successful query requests.")
191193
f.BoolVar(&cfg.ShardByAllLabels, "distributor.shard-by-all-labels", false, "Distribute samples based on all labels, as opposed to solely by user and metric name.")

pkg/util/http.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package util
22

33
import (
44
"bytes"
5+
"compress/gzip"
56
"context"
67
"encoding/json"
78
"flag"
@@ -143,6 +144,7 @@ type CompressionType int
143144
const (
144145
NoCompression CompressionType = iota
145146
RawSnappy
147+
Gzip
146148
)
147149

148150
// ParseProtoReader parses a compressed proto from an io.Reader.
@@ -215,6 +217,13 @@ func decompressFromReader(reader io.Reader, expectedSize, maxSize int, compressi
215217
return nil, err
216218
}
217219
body, err = decompressFromBuffer(&buf, maxSize, RawSnappy, sp)
220+
case Gzip:
221+
reader, err = gzip.NewReader(reader)
222+
if err != nil {
223+
return nil, err
224+
}
225+
_, err = buf.ReadFrom(reader)
226+
body = buf.Bytes()
218227
}
219228
return body, err
220229
}

pkg/util/push/otlp.go

Lines changed: 83 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
package push
22

33
import (
4+
"bytes"
5+
"compress/gzip"
46
"context"
7+
"fmt"
8+
"io"
59
"net/http"
610

711
"github.com/go-kit/log"
812
"github.com/go-kit/log/level"
913
"github.com/prometheus/prometheus/model/labels"
1014
"github.com/prometheus/prometheus/prompb"
11-
"github.com/prometheus/prometheus/storage/remote"
1215
"github.com/prometheus/prometheus/storage/remote/otlptranslator/prometheusremotewrite"
1316
"github.com/prometheus/prometheus/util/annotations"
1417
"github.com/weaveworks/common/httpgrpc"
1518
"github.com/weaveworks/common/middleware"
1619
"go.opentelemetry.io/collector/pdata/pcommon"
1720
"go.opentelemetry.io/collector/pdata/pmetric"
21+
"go.opentelemetry.io/collector/pdata/pmetric/pmetricotlp"
1822

1923
"github.com/cortexproject/cortex/pkg/cortexpb"
2024
"github.com/cortexproject/cortex/pkg/distributor"
@@ -24,8 +28,13 @@ import (
2428
"github.com/cortexproject/cortex/pkg/util/validation"
2529
)
2630

31+
const (
32+
pbContentType = "application/x-protobuf"
33+
jsonContentType = "application/json"
34+
)
35+
2736
// OTLPHandler is a http.Handler which accepts OTLP metrics.
28-
func OTLPHandler(overrides *validation.Overrides, cfg distributor.OTLPConfig, sourceIPs *middleware.SourceIPExtractor, push Func) http.Handler {
37+
func OTLPHandler(maxRecvMsgSize int, overrides *validation.Overrides, cfg distributor.OTLPConfig, sourceIPs *middleware.SourceIPExtractor, push Func) http.Handler {
2938
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
3039
ctx := r.Context()
3140
logger := util_log.WithContext(ctx, util_log.Logger)
@@ -42,7 +51,7 @@ func OTLPHandler(overrides *validation.Overrides, cfg distributor.OTLPConfig, so
4251
return
4352
}
4453

45-
req, err := remote.DecodeOTLPWriteRequest(r)
54+
req, err := decodeOTLPWriteRequest(ctx, r, maxRecvMsgSize)
4655
if err != nil {
4756
level.Error(logger).Log("err", err.Error())
4857
http.Error(w, err.Error(), http.StatusBadRequest)
@@ -90,6 +99,64 @@ func OTLPHandler(overrides *validation.Overrides, cfg distributor.OTLPConfig, so
9099
})
91100
}
92101

102+
func decodeOTLPWriteRequest(ctx context.Context, r *http.Request, maxSize int) (pmetricotlp.ExportRequest, error) {
103+
expectedSize := int(r.ContentLength)
104+
if expectedSize > maxSize {
105+
return pmetricotlp.NewExportRequest(), fmt.Errorf("received message larger than max (%d vs %d)", expectedSize, maxSize)
106+
}
107+
108+
contentType := r.Header.Get("Content-Type")
109+
contentEncoding := r.Header.Get("Content-Encoding")
110+
111+
var compressionType util.CompressionType
112+
switch contentEncoding {
113+
case "gzip":
114+
compressionType = util.Gzip
115+
case "":
116+
compressionType = util.NoCompression
117+
default:
118+
return pmetricotlp.NewExportRequest(), fmt.Errorf("unsupported compression: %s, Supported compression types are \"gzip\" or '' (no compression)", contentEncoding)
119+
}
120+
121+
var decoderFunc func(reader io.Reader) (pmetricotlp.ExportRequest, error)
122+
switch contentType {
123+
case pbContentType:
124+
decoderFunc = func(reader io.Reader) (pmetricotlp.ExportRequest, error) {
125+
req := pmetricotlp.NewExportRequest()
126+
otlpReqProto := otlpProtoMessage{req: &req}
127+
return req, util.ParseProtoReader(ctx, reader, expectedSize, maxSize, otlpReqProto, compressionType)
128+
}
129+
case jsonContentType:
130+
decoderFunc = func(reader io.Reader) (pmetricotlp.ExportRequest, error) {
131+
req := pmetricotlp.NewExportRequest()
132+
133+
reader = io.LimitReader(reader, int64(maxSize)+1)
134+
if compressionType == util.Gzip {
135+
var err error
136+
reader, err = gzip.NewReader(reader)
137+
if err != nil {
138+
return req, err
139+
}
140+
}
141+
142+
var buf bytes.Buffer
143+
if expectedSize > 0 {
144+
buf.Grow(expectedSize + bytes.MinRead) // extra space guarantees no reallocation
145+
}
146+
_, err := buf.ReadFrom(reader)
147+
if err != nil {
148+
return req, err
149+
}
150+
151+
return req, req.UnmarshalJSON(buf.Bytes())
152+
}
153+
default:
154+
return pmetricotlp.NewExportRequest(), fmt.Errorf("unsupported content type: %s, supported: [%s, %s]", contentType, jsonContentType, pbContentType)
155+
}
156+
157+
return decoderFunc(r.Body)
158+
}
159+
93160
func convertToPromTS(ctx context.Context, pmetrics pmetric.Metrics, cfg distributor.OTLPConfig, overrides *validation.Overrides, userID string, logger log.Logger) ([]prompb.TimeSeries, error) {
94161
promConverter := prometheusremotewrite.NewPrometheusConverter()
95162
settings := prometheusremotewrite.Settings{
@@ -223,3 +290,16 @@ func joinAttributeMaps(from, to pcommon.Map) {
223290
return true
224291
})
225292
}
293+
294+
// otlpProtoMessage Implements proto.Meesage, proto.Unmarshaler
295+
type otlpProtoMessage struct {
296+
req *pmetricotlp.ExportRequest
297+
}
298+
299+
func (otlpProtoMessage) ProtoMessage() {}
300+
301+
func (otlpProtoMessage) Reset() {}
302+
303+
func (otlpProtoMessage) String() string { return "" }
304+
305+
func (o otlpProtoMessage) Unmarshal(data []byte) error { return o.req.UnmarshalProto(data) }

0 commit comments

Comments
 (0)