Skip to content

Commit b48f93b

Browse files
authored
Extend ShuffleSharding on READONLY ingesters (#6517)
* Filter readOnly ingesters when sharding Signed-off-by: Daniel Deluiggi <[email protected]> * Extend shard on READONLY Signed-off-by: Daniel Deluiggi <[email protected]> * Remove old code Signed-off-by: Daniel Deluiggi <[email protected]> * Fix test Signed-off-by: Daniel Deluiggi <[email protected]> * update changelog Signed-off-by: Daniel Deluiggi <[email protected]> --------- Signed-off-by: Daniel Deluiggi <[email protected]>
1 parent 4b32f29 commit b48f93b

File tree

8 files changed

+235
-8
lines changed

8 files changed

+235
-8
lines changed

Diff for: CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## master / unreleased
44

55
* [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526
6+
* [BUGFIX] Ingester: Avoid error or early throttling when READONLY ingesters are present in the ring #6517
67

78
## 1.19.0 in progress
89

Diff for: pkg/ingester/ingester.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -3122,7 +3122,7 @@ func (i *Ingester) flushHandler(w http.ResponseWriter, r *http.Request) {
31223122
w.WriteHeader(http.StatusNoContent)
31233123
}
31243124

3125-
// ModeHandler Change mode of ingester. It will also update set unregisterOnShutdown to true if READONLY mode
3125+
// ModeHandler Change mode of ingester.
31263126
func (i *Ingester) ModeHandler(w http.ResponseWriter, r *http.Request) {
31273127
err := r.ParseForm()
31283128
if err != nil {

Diff for: pkg/ring/lifecycler.go

+6
Original file line numberDiff line numberDiff line change
@@ -1005,6 +1005,12 @@ func (i *Lifecycler) changeState(ctx context.Context, state InstanceState) error
10051005

10061006
level.Info(i.logger).Log("msg", "changing instance state from", "old_state", currState, "new_state", state, "ring", i.RingName)
10071007
i.setState(state)
1008+
1009+
//The instances is rejoining the ring. It should reset its registered time.
1010+
if currState == READONLY && state == ACTIVE {
1011+
registeredAt := time.Now()
1012+
i.setRegisteredAt(registeredAt)
1013+
}
10081014
return i.updateConsul(ctx)
10091015
}
10101016

Diff for: pkg/ring/lifecycler_test.go

+80
Original file line numberDiff line numberDiff line change
@@ -827,6 +827,86 @@ func TestTokenFileOnDisk(t *testing.T) {
827827
}
828828
}
829829

830+
func TestRegisteredAtOnBackToActive(t *testing.T) {
831+
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
832+
t.Cleanup(func() { assert.NoError(t, closer.Close()) })
833+
834+
var ringConfig Config
835+
flagext.DefaultValues(&ringConfig)
836+
ringConfig.KVStore.Mock = ringStore
837+
838+
r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil)
839+
require.NoError(t, err)
840+
require.NoError(t, services.StartAndAwaitRunning(context.Background(), r))
841+
defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck
842+
843+
tokenDir := t.TempDir()
844+
845+
lifecyclerConfig := testLifecyclerConfig(ringConfig, "ing1")
846+
lifecyclerConfig.NumTokens = 512
847+
lifecyclerConfig.TokensFilePath = tokenDir + "/tokens"
848+
849+
// Start first ingester.
850+
l1, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "ingester", ringKey, true, true, log.NewNopLogger(), nil)
851+
require.NoError(t, err)
852+
require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1))
853+
854+
// Check this ingester joined, is active.
855+
test.Poll(t, 1000*time.Millisecond, true, func() interface{} {
856+
d, err := r.KVClient.Get(context.Background(), ringKey)
857+
require.NoError(t, err)
858+
859+
desc, ok := d.(*Desc)
860+
return ok &&
861+
len(desc.Ingesters) == 1 &&
862+
desc.Ingesters["ing1"].State == ACTIVE
863+
})
864+
865+
//Get original registeredTime
866+
d, err := r.KVClient.Get(context.Background(), ringKey)
867+
require.NoError(t, err)
868+
desc, ok := d.(*Desc)
869+
require.True(t, ok)
870+
originalRegisterTime := desc.Ingesters["ing1"].RegisteredTimestamp
871+
872+
// Change state from ACTIVE to READONLY
873+
err = l1.ChangeState(context.Background(), READONLY)
874+
require.NoError(t, err)
875+
test.Poll(t, 1000*time.Millisecond, true, func() interface{} {
876+
d, err := r.KVClient.Get(context.Background(), ringKey)
877+
require.NoError(t, err)
878+
879+
desc, ok := d.(*Desc)
880+
return ok &&
881+
desc.Ingesters["ing1"].State == READONLY
882+
})
883+
884+
//Guarantee 1s diff for RegisteredTimestamp
885+
time.Sleep(1 * time.Second)
886+
887+
// Change state from READONLY to ACTIVE
888+
err = l1.ChangeState(context.Background(), ACTIVE)
889+
require.NoError(t, err)
890+
test.Poll(t, 1000*time.Millisecond, true, func() interface{} {
891+
d, err := r.KVClient.Get(context.Background(), ringKey)
892+
require.NoError(t, err)
893+
894+
desc, ok := d.(*Desc)
895+
return ok &&
896+
desc.Ingesters["ing1"].State == ACTIVE
897+
})
898+
899+
d, err = r.KVClient.Get(context.Background(), ringKey)
900+
require.NoError(t, err)
901+
902+
desc, ok = d.(*Desc)
903+
require.True(t, ok)
904+
ing := desc.Ingesters["ing1"]
905+
require.True(t, ing.RegisteredTimestamp > originalRegisterTime)
906+
907+
require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l1))
908+
}
909+
830910
func TestTokenFileOnDisk_WithoutAutoJoinOnStartup(t *testing.T) {
831911
ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil)
832912
t.Cleanup(func() { assert.NoError(t, closer.Close()) })

Diff for: pkg/ring/model.go

+14-5
Original file line numberDiff line numberDiff line change
@@ -543,10 +543,11 @@ type CompareResult int
543543
const (
544544
Equal CompareResult = iota // Both rings contain same exact instances.
545545
EqualButStatesAndTimestamps // Both rings contain the same instances with the same data except states and timestamps (may differ).
546+
EqualButReadOnly // Both rings contain the same instances but Write ring can change due to ReadOnly update
546547
Different // Rings have different set of instances, or their information don't match.
547548
)
548549

549-
// RingCompare compares this ring against another one and returns one of Equal, EqualButStatesAndTimestamps or Different.
550+
// RingCompare compares this ring against another one and returns one of Equal, EqualButStatesAndTimestamps, EqualButReadOnly or Different.
550551
func (d *Desc) RingCompare(o *Desc) CompareResult {
551552
if d == nil {
552553
if o == nil || len(o.Ingesters) == 0 {
@@ -566,6 +567,7 @@ func (d *Desc) RingCompare(o *Desc) CompareResult {
566567
}
567568

568569
equalStatesAndTimestamps := true
570+
equalReadOnly := true
569571

570572
for name, ing := range d.Ingesters {
571573
oing, ok := o.Ingesters[name]
@@ -600,14 +602,21 @@ func (d *Desc) RingCompare(o *Desc) CompareResult {
600602
}
601603

602604
if ing.State != oing.State {
603-
equalStatesAndTimestamps = false
605+
if ing.State == READONLY || oing.State == READONLY {
606+
equalReadOnly = false
607+
} else {
608+
equalStatesAndTimestamps = false
609+
}
604610
}
605611
}
606612

607-
if equalStatesAndTimestamps {
608-
return Equal
613+
if !equalReadOnly {
614+
return EqualButReadOnly
615+
}
616+
if !equalStatesAndTimestamps {
617+
return EqualButStatesAndTimestamps
609618
}
610-
return EqualButStatesAndTimestamps
619+
return Equal
611620
}
612621

613622
func GetOrCreateRingDesc(d interface{}) *Desc {

Diff for: pkg/ring/model_test.go

+20
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,21 @@ func TestDesc_RingsCompare(t *testing.T) {
395395
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}},
396396
expected: Equal,
397397
},
398+
"same number of instances, from active to readOnly": {
399+
r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE}}},
400+
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: READONLY}}},
401+
expected: EqualButReadOnly,
402+
},
403+
"same number of instances, from readOnly to active": {
404+
r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: READONLY}}},
405+
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE}}},
406+
expected: EqualButReadOnly,
407+
},
408+
"same number of instances, prioritize readOnly than timestamp changes": {
409+
r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE, Timestamp: 123456}}},
410+
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: READONLY, Timestamp: 789012}}},
411+
expected: EqualButReadOnly,
412+
},
398413
"same single instance, different timestamp": {
399414
r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Timestamp: 123456}}},
400415
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Timestamp: 789012}}},
@@ -440,6 +455,11 @@ func TestDesc_RingsCompare(t *testing.T) {
440455
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing2": {Addr: "addr1", Tokens: []uint32{1, 2, 3}}}},
441456
expected: Different,
442457
},
458+
"same number of instances, prioritize diff than ReadOnly": {
459+
r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Zone: "one", State: ACTIVE}}},
460+
r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Zone: "two", State: READONLY}}},
461+
expected: Different,
462+
},
443463
}
444464

445465
for testName, testData := range tests {

Diff for: pkg/ring/ring.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -333,12 +333,16 @@ func (r *Ring) updateRingState(ringDesc *Desc) {
333333
}
334334

335335
rc := prevRing.RingCompare(ringDesc)
336-
if rc == Equal || rc == EqualButStatesAndTimestamps {
336+
if rc == Equal || rc == EqualButStatesAndTimestamps || rc == EqualButReadOnly {
337337
// No need to update tokens or zones. Only states and timestamps
338338
// have changed. (If Equal, nothing has changed, but that doesn't happen
339339
// when watching the ring for updates).
340340
r.mtx.Lock()
341341
r.ringDesc = ringDesc
342+
if rc == EqualButReadOnly && r.shuffledSubringCache != nil {
343+
// Invalidate all cached subrings.
344+
r.shuffledSubringCache = make(map[subringCacheKey]*Ring)
345+
}
342346
r.updateRingMetrics(rc)
343347
r.mtx.Unlock()
344348
return
@@ -852,7 +856,9 @@ func (r *Ring) shuffleShard(identifier string, size int, lookbackPeriod time.Dur
852856

853857
// If the lookback is enabled and this instance has been registered within the lookback period
854858
// then we should include it in the subring but continuing selecting instances.
855-
if lookbackPeriod > 0 && instance.RegisteredTimestamp >= lookbackUntil {
859+
// If an instance is in READONLY we should always extend. The write path will filter it out when GetRing.
860+
// The read path should extend to get new ingester used on write
861+
if (lookbackPeriod > 0 && instance.RegisteredTimestamp >= lookbackUntil) || instance.State == READONLY {
856862
continue
857863
}
858864

Diff for: pkg/ring/ring_test.go

+105
Original file line numberDiff line numberDiff line change
@@ -2523,6 +2523,111 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) {
25232523
}
25242524
}
25252525

2526+
func TestRing_ShuffleShardWithReadOnlyIngesters(t *testing.T) {
2527+
g := NewRandomTokenGenerator()
2528+
2529+
const (
2530+
userID = "user-1"
2531+
)
2532+
2533+
tests := map[string]struct {
2534+
ringInstances map[string]InstanceDesc
2535+
ringReplicationFactor int
2536+
shardSize int
2537+
expectedSize int
2538+
op Operation
2539+
expectedToBePresent []string
2540+
}{
2541+
"single zone, shard size = 1, default scenario": {
2542+
ringInstances: map[string]InstanceDesc{
2543+
"instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE, Tokens: g.GenerateTokens(NewDesc(), "instance-1", "zone-a", 128, true)},
2544+
"instance-2": {Addr: "127.0.0.2", Zone: "zone-a", State: ACTIVE, Tokens: g.GenerateTokens(NewDesc(), "instance-2", "zone-a", 128, true)},
2545+
},
2546+
ringReplicationFactor: 1,
2547+
shardSize: 1,
2548+
expectedSize: 1,
2549+
},
2550+
"single zone, shard size = 1, not filter ReadOnly": {
2551+
ringInstances: map[string]InstanceDesc{
2552+
"instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE, Tokens: g.GenerateTokens(NewDesc(), "instance-1", "zone-a", 128, true)},
2553+
"instance-2": {Addr: "127.0.0.2", Zone: "zone-a", State: READONLY, Tokens: g.GenerateTokens(NewDesc(), "instance-2", "zone-a", 128, true)},
2554+
},
2555+
ringReplicationFactor: 1,
2556+
shardSize: 2,
2557+
expectedSize: 2,
2558+
},
2559+
"single zone, shard size = 4, do not filter other states": {
2560+
ringInstances: map[string]InstanceDesc{
2561+
"instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE, Tokens: g.GenerateTokens(NewDesc(), "instance-1", "zone-a", 128, true)},
2562+
"instance-2": {Addr: "127.0.0.2", Zone: "zone-a", State: JOINING, Tokens: g.GenerateTokens(NewDesc(), "instance-2", "zone-a", 128, true)},
2563+
"instance-3": {Addr: "127.0.0.3", Zone: "zone-a", State: LEAVING, Tokens: g.GenerateTokens(NewDesc(), "instance-3", "zone-a", 128, true)},
2564+
"instance-4": {Addr: "127.0.0.4", Zone: "zone-a", State: PENDING, Tokens: g.GenerateTokens(NewDesc(), "instance-4", "zone-a", 128, true)},
2565+
},
2566+
ringReplicationFactor: 1,
2567+
shardSize: 4,
2568+
expectedSize: 4,
2569+
},
2570+
"single zone, shard size = 4, extend on readOnly": {
2571+
ringInstances: map[string]InstanceDesc{
2572+
"instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE, Tokens: []uint32{2}},
2573+
"instance-2": {Addr: "127.0.0.2", Zone: "zone-a", State: ACTIVE, Tokens: []uint32{4}},
2574+
"instance-3": {Addr: "127.0.0.3", Zone: "zone-a", State: ACTIVE, Tokens: []uint32{6}},
2575+
"instance-4": {Addr: "127.0.0.4", Zone: "zone-a", State: READONLY, Tokens: []uint32{1, 3, 5}},
2576+
},
2577+
ringReplicationFactor: 1,
2578+
shardSize: 2,
2579+
expectedSize: 3,
2580+
expectedToBePresent: []string{"instance-4"},
2581+
},
2582+
"rf = 3, shard size = 4, extend readOnly from different zones": {
2583+
ringInstances: map[string]InstanceDesc{
2584+
"instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE, Tokens: []uint32{2}},
2585+
"instance-2": {Addr: "127.0.0.2", Zone: "zone-b", State: ACTIVE, Tokens: []uint32{12}},
2586+
"instance-3": {Addr: "127.0.0.3", Zone: "zone-c", State: ACTIVE, Tokens: []uint32{22}},
2587+
"instance-4": {Addr: "127.0.0.4", Zone: "zone-a", State: ACTIVE, Tokens: []uint32{4}},
2588+
"instance-5": {Addr: "127.0.0.5", Zone: "zone-b", State: ACTIVE, Tokens: []uint32{14}},
2589+
"instance-6": {Addr: "127.0.0.6", Zone: "zone-c", State: ACTIVE, Tokens: []uint32{24}},
2590+
"instance-7": {Addr: "127.0.0.7", Zone: "zone-a", State: READONLY, Tokens: []uint32{1, 3}},
2591+
"instance-8": {Addr: "127.0.0.8", Zone: "zone-b", State: READONLY, Tokens: []uint32{11, 13}},
2592+
"instance-9": {Addr: "127.0.0.9", Zone: "zone-c", State: READONLY, Tokens: []uint32{21, 23}},
2593+
},
2594+
ringReplicationFactor: 3,
2595+
shardSize: 6,
2596+
expectedSize: 9,
2597+
expectedToBePresent: []string{"instance-7", "instance-8", "instance-9"},
2598+
},
2599+
}
2600+
2601+
for testName, testData := range tests {
2602+
t.Run(testName, func(t *testing.T) {
2603+
// Init the ring.
2604+
ringDesc := &Desc{Ingesters: testData.ringInstances}
2605+
for id, instance := range ringDesc.Ingesters {
2606+
ringDesc.Ingesters[id] = instance
2607+
}
2608+
2609+
ring := Ring{
2610+
cfg: Config{
2611+
ReplicationFactor: testData.ringReplicationFactor,
2612+
},
2613+
ringDesc: ringDesc,
2614+
ringTokens: ringDesc.GetTokens(),
2615+
ringTokensByZone: ringDesc.getTokensByZone(),
2616+
ringInstanceByToken: ringDesc.getTokensInfo(),
2617+
ringZones: getZones(ringDesc.getTokensByZone()),
2618+
strategy: NewDefaultReplicationStrategy(),
2619+
KVClient: &MockClient{},
2620+
}
2621+
2622+
shardRing := ring.ShuffleShard(userID, testData.shardSize)
2623+
assert.Equal(t, testData.expectedSize, shardRing.InstancesCount())
2624+
for _, expectedInstance := range testData.expectedToBePresent {
2625+
assert.True(t, shardRing.HasInstance(expectedInstance))
2626+
}
2627+
})
2628+
}
2629+
}
2630+
25262631
func TestRing_ShuffleShardWithLookback_CorrectnessWithFuzzy(t *testing.T) {
25272632
// The goal of this test is NOT to ensure that the minimum required number of instances
25282633
// are returned at any given time, BUT at least all required instances are returned.

0 commit comments

Comments
 (0)