Skip to content

Commit 68012ec

Browse files
authored
Update thanos to latest main to pull lazy posting improvements (#6411)
* update thanos to latest main to pull lazy posting improvements Signed-off-by: Ben Ye <[email protected]> * fix lint Signed-off-by: Ben Ye <[email protected]> --------- Signed-off-by: Ben Ye <[email protected]>
1 parent 1177a67 commit 68012ec

File tree

12 files changed

+155
-40
lines changed

12 files changed

+155
-40
lines changed

Diff for: docs/blocks-storage/querier.md

+8
Original file line numberDiff line numberDiff line change
@@ -1415,6 +1415,14 @@ blocks_storage:
14151415
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
14161416
[lazy_expanded_postings_enabled: <boolean> | default = false]
14171417

1418+
# Mark posting group as lazy if it fetches more keys than R * max series the
1419+
# query should fetch. With R set to 100, a posting group which fetches 100K
1420+
# keys will be marked as lazy if the current query only fetches 1000 series.
1421+
# This config is only valid if lazy expanded posting is enabled. 0 disables
1422+
# the limit.
1423+
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
1424+
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]
1425+
14181426
# Controls how many series to fetch per batch in Store Gateway. Default
14191427
# value is 10000.
14201428
# CLI flag: -blocks-storage.bucket-store.series-batch-size

Diff for: docs/blocks-storage/store-gateway.md

+8
Original file line numberDiff line numberDiff line change
@@ -1519,6 +1519,14 @@ blocks_storage:
15191519
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
15201520
[lazy_expanded_postings_enabled: <boolean> | default = false]
15211521

1522+
# Mark posting group as lazy if it fetches more keys than R * max series the
1523+
# query should fetch. With R set to 100, a posting group which fetches 100K
1524+
# keys will be marked as lazy if the current query only fetches 1000 series.
1525+
# This config is only valid if lazy expanded posting is enabled. 0 disables
1526+
# the limit.
1527+
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
1528+
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]
1529+
15221530
# Controls how many series to fetch per batch in Store Gateway. Default
15231531
# value is 10000.
15241532
# CLI flag: -blocks-storage.bucket-store.series-batch-size

Diff for: docs/configuration/config-file-reference.md

+8
Original file line numberDiff line numberDiff line change
@@ -1949,6 +1949,14 @@ bucket_store:
19491949
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-postings-enabled
19501950
[lazy_expanded_postings_enabled: <boolean> | default = false]
19511951

1952+
# Mark posting group as lazy if it fetches more keys than R * max series the
1953+
# query should fetch. With R set to 100, a posting group which fetches 100K
1954+
# keys will be marked as lazy if the current query only fetches 1000 series.
1955+
# This config is only valid if lazy expanded posting is enabled. 0 disables
1956+
# the limit.
1957+
# CLI flag: -blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio
1958+
[lazy_expanded_posting_group_max_key_series_ratio: <float> | default = 100]
1959+
19521960
# Controls how many series to fetch per batch in Store Gateway. Default value
19531961
# is 10000.
19541962
# CLI flag: -blocks-storage.bucket-store.series-batch-size

Diff for: go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ require (
5353
github.com/stretchr/testify v1.10.0
5454
github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97
5555
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68
56-
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc
56+
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278
5757
github.com/uber/jaeger-client-go v2.30.0+incompatible
5858
github.com/weaveworks/common v0.0.0-20230728070032-dd9e68f319d5
5959
go.etcd.io/etcd/api/v3 v3.5.17

Diff for: go.sum

+2-2
Original file line numberDiff line numberDiff line change
@@ -1665,8 +1665,8 @@ github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97 h1:VjG0mwhN1Dkn
16651665
github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97/go.mod h1:vyzFrBXgP+fGNG2FopEGWOO/zrIuoy7zt3LpLeezRsw=
16661666
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68 h1:cChM/FbpXeYmrSmXO1/MmmSlONviLVxWAWCB0/g4JrY=
16671667
github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68/go.mod h1:wx0JlRZtsB2S10JYUgeg5GqLfMxw31SzArP+28yyE00=
1668-
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc h1:LMpGIErJWqv+9FmCHAcl9t+6VL8gn6lptIKDgglbNnU=
1669-
github.com/thanos-io/thanos v0.37.2-0.20241205123958-d0d93dbf3efc/go.mod h1:5Ni7Uc1Bc8UCGOYmZ/2f/LVvDkZKNDdqDJZqjDFG+rI=
1668+
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278 h1:5MYGbe7gYtPE/DYReOxrevi++3+mgwz5ud9ji/lwXrg=
1669+
github.com/thanos-io/thanos v0.37.2-0.20241210071311-51c7dcd8c278/go.mod h1:5Ni7Uc1Bc8UCGOYmZ/2f/LVvDkZKNDdqDJZqjDFG+rI=
16701670
github.com/tjhop/slog-gokit v0.1.2 h1:pmQI4SvU9h4gA0vIQsdhJQSqQg4mOmsPykG2/PM3j1I=
16711671
github.com/tjhop/slog-gokit v0.1.2/go.mod h1:8fhlcp8C8ELbg3GCyKv06tgt4B5sDq2P1r2DQAu1HuM=
16721672
github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM=

Diff for: pkg/storage/tsdb/config.go

+11-3
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,10 @@ var (
5252
errEmptyBlockranges = errors.New("empty block ranges for TSDB")
5353
errUnSupportedWALCompressionType = errors.New("unsupported WAL compression type, valid types are (zstd, snappy and '')")
5454

55-
ErrInvalidBucketIndexBlockDiscoveryStrategy = errors.New("bucket index block discovery strategy can only be enabled when bucket index is enabled")
56-
ErrBlockDiscoveryStrategy = errors.New("invalid block discovery strategy")
57-
ErrInvalidTokenBucketBytesLimiterMode = errors.New("invalid token bucket bytes limiter mode")
55+
ErrInvalidBucketIndexBlockDiscoveryStrategy = errors.New("bucket index block discovery strategy can only be enabled when bucket index is enabled")
56+
ErrBlockDiscoveryStrategy = errors.New("invalid block discovery strategy")
57+
ErrInvalidTokenBucketBytesLimiterMode = errors.New("invalid token bucket bytes limiter mode")
58+
ErrInvalidLazyExpandedPostingGroupMaxKeySeriesRatio = errors.New("lazy expanded posting group max key series ratio needs to be equal or greater than 0")
5859
)
5960

6061
// BlocksStorageConfig holds the config information for the blocks storage.
@@ -291,6 +292,9 @@ type BucketStoreConfig struct {
291292
// Controls whether lazy expanded posting optimization is enabled or not.
292293
LazyExpandedPostingsEnabled bool `yaml:"lazy_expanded_postings_enabled"`
293294

295+
// Controls whether expanded posting group is marked as lazy or not depending on number of keys to fetch.
296+
LazyExpandedPostingGroupMaxKeySeriesRatio float64 `yaml:"lazy_expanded_posting_group_max_key_series_ratio"`
297+
294298
// Controls the partitioner, used to aggregate multiple GET object API requests.
295299
// The config option is hidden until experimental.
296300
PartitionerMaxGapBytes uint64 `yaml:"partitioner_max_gap_bytes" doc:"hidden"`
@@ -356,6 +360,7 @@ func (cfg *BucketStoreConfig) RegisterFlags(f *flag.FlagSet) {
356360
f.Uint64Var(&cfg.EstimatedMaxSeriesSizeBytes, "blocks-storage.bucket-store.estimated-max-series-size-bytes", store.EstimatedMaxSeriesSize, "Estimated max series size in bytes. Setting a large value might result in over fetching data while a small value might result in data refetch. Default value is 64KB.")
357361
f.Uint64Var(&cfg.EstimatedMaxChunkSizeBytes, "blocks-storage.bucket-store.estimated-max-chunk-size-bytes", store.EstimatedMaxChunkSize, "Estimated max chunk size in bytes. Setting a large value might result in over fetching data while a small value might result in data refetch. Default value is 16KiB.")
358362
f.BoolVar(&cfg.LazyExpandedPostingsEnabled, "blocks-storage.bucket-store.lazy-expanded-postings-enabled", false, "If true, Store Gateway will estimate postings size and try to lazily expand postings if it downloads less data than expanding all postings.")
363+
f.Float64Var(&cfg.LazyExpandedPostingGroupMaxKeySeriesRatio, "blocks-storage.bucket-store.lazy-expanded-posting-group-max-key-series-ratio", 100, "Mark posting group as lazy if it fetches more keys than R * max series the query should fetch. With R set to 100, a posting group which fetches 100K keys will be marked as lazy if the current query only fetches 1000 series. This config is only valid if lazy expanded posting is enabled. 0 disables the limit.")
359364
f.IntVar(&cfg.SeriesBatchSize, "blocks-storage.bucket-store.series-batch-size", store.SeriesBatchSize, "Controls how many series to fetch per batch in Store Gateway. Default value is 10000.")
360365
f.StringVar(&cfg.BlockDiscoveryStrategy, "blocks-storage.bucket-store.block-discovery-strategy", string(ConcurrentDiscovery), "One of "+strings.Join(supportedBlockDiscoveryStrategies, ", ")+". When set to concurrent, stores will concurrently issue one call per directory to discover active blocks in the bucket. The recursive strategy iterates through all objects in the bucket, recursively traversing into each directory. This avoids N+1 calls at the expense of having slower bucket iterations. bucket_index strategy can be used in Compactor only and utilizes the existing bucket index to fetch block IDs to sync. This avoids iterating the bucket but can be impacted by delays of cleaner creating bucket index.")
361366
f.StringVar(&cfg.TokenBucketBytesLimiter.Mode, "blocks-storage.bucket-store.token-bucket-bytes-limiter.mode", string(TokenBucketBytesLimiterDisabled), fmt.Sprintf("Token bucket bytes limiter mode. Supported values are: %s", strings.Join(supportedTokenBucketBytesLimiterModes, ", ")))
@@ -390,6 +395,9 @@ func (cfg *BucketStoreConfig) Validate() error {
390395
if !util.StringsContain(supportedTokenBucketBytesLimiterModes, cfg.TokenBucketBytesLimiter.Mode) {
391396
return ErrInvalidTokenBucketBytesLimiterMode
392397
}
398+
if cfg.LazyExpandedPostingGroupMaxKeySeriesRatio < 0 {
399+
return ErrInvalidLazyExpandedPostingGroupMaxKeySeriesRatio
400+
}
393401
return nil
394402
}
395403

Diff for: pkg/storegateway/bucket_store_metrics.go

+7
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ type BucketStoreMetrics struct {
4646
chunkFetchDurationSum *prometheus.Desc
4747

4848
lazyExpandedPostingsCount *prometheus.Desc
49+
lazyExpandedPostingGroups *prometheus.Desc
4950
lazyExpandedPostingSizeBytes *prometheus.Desc
5051
lazyExpandedPostingSeriesOverfetchedSizeBytes *prometheus.Desc
5152

@@ -209,6 +210,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
209210
"cortex_bucket_store_lazy_expanded_postings_total",
210211
"Total number of lazy expanded postings when fetching block series.",
211212
nil, nil),
213+
lazyExpandedPostingGroups: prometheus.NewDesc(
214+
"cortex_bucket_store_lazy_expanded_posting_groups_total",
215+
"Total number of posting groups that are marked as lazy and corresponding reason.",
216+
[]string{"reason"}, nil),
212217
lazyExpandedPostingSizeBytes: prometheus.NewDesc(
213218
"cortex_bucket_store_lazy_expanded_posting_size_bytes_total",
214219
"Total number of lazy posting group size in bytes.",
@@ -269,6 +274,7 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
269274
out <- m.indexHeaderLazyLoadDuration
270275

271276
out <- m.lazyExpandedPostingsCount
277+
out <- m.lazyExpandedPostingGroups
272278
out <- m.lazyExpandedPostingSizeBytes
273279
out <- m.lazyExpandedPostingSeriesOverfetchedSizeBytes
274280
}
@@ -319,6 +325,7 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
319325
data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds")
320326

321327
data.SendSumOfCounters(out, m.lazyExpandedPostingsCount, "thanos_bucket_store_lazy_expanded_postings_total")
328+
data.SendSumOfCountersWithLabels(out, m.lazyExpandedPostingGroups, "thanos_bucket_store_lazy_expanded_posting_groups_total", "reason")
322329
data.SendSumOfCounters(out, m.lazyExpandedPostingSizeBytes, "thanos_bucket_store_lazy_expanded_posting_size_bytes_total")
323330
data.SendSumOfCounters(out, m.lazyExpandedPostingSeriesOverfetchedSizeBytes, "thanos_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total")
324331
}

Diff for: pkg/storegateway/bucket_store_metrics_test.go

+12
Original file line numberDiff line numberDiff line change
@@ -543,6 +543,10 @@ func TestBucketStoreMetrics(t *testing.T) {
543543
# HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations.
544544
# TYPE cortex_bucket_store_indexheader_lazy_unload_total counter
545545
cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06
546+
# HELP cortex_bucket_store_lazy_expanded_posting_groups_total Total number of posting groups that are marked as lazy and corresponding reason.
547+
# TYPE cortex_bucket_store_lazy_expanded_posting_groups_total counter
548+
cortex_bucket_store_lazy_expanded_posting_groups_total{reason="keys_limit"} 202671
549+
cortex_bucket_store_lazy_expanded_posting_groups_total{reason="postings_size"} 225190
546550
# HELP cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total Total number of series size in bytes overfetched due to posting lazy expansion.
547551
# TYPE cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total counter
548552
cortex_bucket_store_lazy_expanded_posting_series_overfetched_size_bytes_total 180152
@@ -687,6 +691,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
687691
m.lazyExpandedPostingsCount.Add(6 * base)
688692
m.lazyExpandedPostingSizeBytes.Add(7 * base)
689693
m.lazyExpandedPostingSeriesOverfetchedSizeBytes.Add(8 * base)
694+
m.lazyExpandedPostingGroups.WithLabelValues("keys_limit").Add(9 * base)
695+
m.lazyExpandedPostingGroups.WithLabelValues("postings_size").Add(10 * base)
690696

691697
return reg
692698
}
@@ -733,6 +739,7 @@ type mockedBucketStoreMetrics struct {
733739
indexHeaderLazyLoadDuration prometheus.Histogram
734740

735741
lazyExpandedPostingsCount prometheus.Counter
742+
lazyExpandedPostingGroups *prometheus.CounterVec
736743
lazyExpandedPostingSizeBytes prometheus.Counter
737744
lazyExpandedPostingSeriesOverfetchedSizeBytes prometheus.Counter
738745
}
@@ -917,6 +924,11 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
917924
Help: "Total number of times when lazy expanded posting optimization applies.",
918925
})
919926

927+
m.lazyExpandedPostingGroups = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{
928+
Name: "thanos_bucket_store_lazy_expanded_posting_groups_total",
929+
Help: "Total number of posting groups that are marked as lazy and corresponding reason.",
930+
}, []string{"reason"})
931+
920932
m.lazyExpandedPostingSizeBytes = promauto.With(reg).NewCounter(prometheus.CounterOpts{
921933
Name: "thanos_bucket_store_lazy_expanded_posting_size_bytes_total",
922934
Help: "Total number of lazy posting group size in bytes.",

Diff for: pkg/storegateway/bucket_stores.go

+1
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,7 @@ func (u *BucketStores) getOrCreateStore(userID string) (*store.BucketStore, erro
625625
return u.cfg.BucketStore.EstimatedMaxSeriesSizeBytes
626626
}),
627627
store.WithLazyExpandedPostings(u.cfg.BucketStore.LazyExpandedPostingsEnabled),
628+
store.WithPostingGroupMaxKeySeriesRatio(u.cfg.BucketStore.LazyExpandedPostingGroupMaxKeySeriesRatio),
628629
store.WithDontResort(true), // Cortex doesn't need to resort series in store gateway.
629630
}
630631
if u.logLevel.String() == "debug" {

0 commit comments

Comments
 (0)