@@ -31,6 +31,10 @@ const (
31
31
// GetBufferSize is the suggested size of buffers passed to Ring.Get(). It's based on
32
32
// a typical replication factor 3, plus extra room for a JOINING + LEAVING instance.
33
33
GetBufferSize = 5
34
+
35
+ // GetZoneSize is the suggested size of zone map passed to Ring.Get(). It's based on
36
+ // a typical replication factor 3.
37
+ GetZoneSize = 3
34
38
)
35
39
36
40
// ReadRing represents the read interface to the ring.
@@ -39,7 +43,7 @@ type ReadRing interface {
39
43
// Get returns n (or more) instances which form the replicas for the given key.
40
44
// bufDescs, bufHosts and bufZones are slices to be overwritten for the return value
41
45
// to avoid memory allocation; can be nil, or created with ring.MakeBuffersForGet().
42
- Get (key uint32 , op Operation , bufDescs []InstanceDesc , bufHosts , bufZones [] string ) (ReplicationSet , error )
46
+ Get (key uint32 , op Operation , bufDescs []InstanceDesc , bufHosts [] string , bufZones map [ string ] int ) (ReplicationSet , error )
43
47
44
48
// GetAllHealthy returns all healthy instances in the ring, for the given operation.
45
49
// This function doesn't check if the quorum is honored, so doesn't fail if the number
@@ -340,24 +344,30 @@ func (r *Ring) updateRingState(ringDesc *Desc) {
340
344
}
341
345
342
346
// Get returns n (or more) instances which form the replicas for the given key.
343
- func (r * Ring ) Get (key uint32 , op Operation , bufDescs []InstanceDesc , bufHosts , bufZones []string ) (ReplicationSet , error ) {
347
+ // This implementation guarantees:
348
+ // - Stability: given the same ring, two invocations returns the same set for same operation.
349
+ // - Consistency: adding/removing 1 instance from the ring returns set with no more than 1 difference for same operation.
350
+ func (r * Ring ) Get (key uint32 , op Operation , bufDescs []InstanceDesc , bufHosts []string , bufZones map [string ]int ) (ReplicationSet , error ) {
344
351
r .mtx .RLock ()
345
352
defer r .mtx .RUnlock ()
346
353
if r .ringDesc == nil || len (r .ringTokens ) == 0 {
347
354
return ReplicationSet {}, ErrEmptyRing
348
355
}
349
356
350
357
var (
351
- replicationFactor = r .cfg .ReplicationFactor
352
- instances = bufDescs [:0 ]
353
- start = searchToken (r .ringTokens , key )
354
- iterations = 0
358
+ replicationFactor = r .cfg .ReplicationFactor
359
+ instances = bufDescs [:0 ]
360
+ start = searchToken (r .ringTokens , key )
361
+ iterations = 0
362
+ maxInstancePerZone = replicationFactor / len (r .ringZones )
363
+ zonesWithExtraInstance = replicationFactor % len (r .ringZones )
355
364
356
365
// We use a slice instead of a map because it's faster to search within a
357
366
// slice than lookup a map for a very low number of items.
358
- distinctHosts = bufHosts [:0 ]
359
- distinctZones = bufZones [: 0 ]
367
+ distinctHosts = bufHosts [:0 ]
368
+ numOfInstanceByZone = resetZoneMap ( bufZones )
360
369
)
370
+
361
371
for i := start ; len (distinctHosts ) < replicationFactor && iterations < len (r .ringTokens ); i ++ {
362
372
iterations ++
363
373
// Wrap i around in the ring.
@@ -370,14 +380,20 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts,
370
380
return ReplicationSet {}, ErrInconsistentTokensInfo
371
381
}
372
382
373
- // We want n *distinct* instances && distinct zones .
383
+ // We want n *distinct* instances.
374
384
if util .StringsContain (distinctHosts , info .InstanceID ) {
375
385
continue
376
386
}
377
387
378
388
// Ignore if the instances don't have a zone set.
379
389
if r .cfg .ZoneAwarenessEnabled && info .Zone != "" {
380
- if util .StringsContain (distinctZones , info .Zone ) {
390
+ maxNumOfInstance := maxInstancePerZone
391
+ // If we still have room for zones with extra instance, increase the instance threshold by 1
392
+ if zonesWithExtraInstance > 0 {
393
+ maxNumOfInstance ++
394
+ }
395
+
396
+ if numOfInstanceByZone [info .Zone ] >= maxNumOfInstance {
381
397
continue
382
398
}
383
399
}
@@ -392,11 +408,14 @@ func (r *Ring) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts,
392
408
} else if r .cfg .ZoneAwarenessEnabled && info .Zone != "" {
393
409
// We should only add the zone if we are not going to extend,
394
410
// as we want to extend the instance in the same AZ.
395
- distinctZones = append (distinctZones , info .Zone )
396
-
397
- if len (distinctZones ) == len (r .ringZones ) {
398
- // reset the zones to repeatedly get hosts from distinct zones
399
- distinctZones = distinctZones [:0 ]
411
+ if numOfInstance , ok := numOfInstanceByZone [info .Zone ]; ! ok {
412
+ numOfInstanceByZone [info .Zone ] = 1
413
+ } else if numOfInstance < maxInstancePerZone {
414
+ numOfInstanceByZone [info .Zone ]++
415
+ } else {
416
+ // This zone will have an extra instance
417
+ numOfInstanceByZone [info .Zone ]++
418
+ zonesWithExtraInstance --
400
419
}
401
420
}
402
421
0 commit comments