Skip to content

Commit d4f14fa

Browse files
authored
Merge pull request #770 from l1b0k/fix/eni_detach
fix: eni may detached in race condition
2 parents 40ebe7b + f3d1696 commit d4f14fa

File tree

2 files changed

+57
-3
lines changed

2 files changed

+57
-3
lines changed

Diff for: pkg/eni/local.go

+25-3
Original file line numberDiff line numberDiff line change
@@ -824,8 +824,7 @@ func (l *Local) Dispose(n int) int {
824824

825825
// 1. check if can dispose the eni
826826
if n >= max(len(l.ipv4), len(l.ipv6)) {
827-
eniType := strings.ToLower(l.eniType)
828-
if eniType != "trunk" && eniType != "erdma" && !l.eni.Trunk && len(l.ipv4.InUse()) == 0 && len(l.ipv6.InUse()) == 0 {
827+
if l.canDispose() {
829828
log.Info("dispose eni")
830829
l.status = statusDeleting
831830
return max(len(l.ipv4), len(l.ipv6))
@@ -902,7 +901,11 @@ func (l *Local) factoryDisposeWorker(ctx context.Context) {
902901

903902
if l.status == statusDeleting {
904903
// remove the eni
905-
904+
if !l.canDispose() {
905+
l.cond.Wait()
906+
// this can't happen
907+
continue
908+
}
906909
l.cond.L.Unlock()
907910

908911
err := l.rateLimitEni.Wait(ctx)
@@ -1082,6 +1085,25 @@ func (l *Local) commit(ctx context.Context, respCh chan *AllocResp, ipv4, ipv6 *
10821085
}
10831086
}
10841087

1088+
// canDispose will check current eni status , and make sure no pending jobs ,and no pod is using this eni
1089+
func (l *Local) canDispose() bool {
1090+
if l.eni == nil {
1091+
return true
1092+
}
1093+
eniType := strings.ToLower(l.eniType)
1094+
switch eniType {
1095+
case "trunk", "erdma":
1096+
return false
1097+
}
1098+
if l.eni.Trunk {
1099+
return false
1100+
}
1101+
return len(l.ipv4.InUse()) == 0 &&
1102+
len(l.ipv6.InUse()) == 0 &&
1103+
l.allocatingV4.Len() == 0 &&
1104+
l.allocatingV6.Len() == 0
1105+
}
1106+
10851107
// syncIPLocked will mark ip as invalid , if not found in remote
10861108
func syncIPLocked(lo Set, remote []netip.Addr) {
10871109
s := sets.New[netip.Addr](remote...)

Diff for: pkg/eni/local_test.go

+32
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,25 @@ func TestLocal_AllocWorker_ParentCancelContext(t *testing.T) {
186186
assert.False(t, ok)
187187
}
188188

189+
func TestLocal_AllocWorker_UpdateCache(t *testing.T) {
190+
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{
191+
EnableIPv4: true,
192+
}, "")
193+
cni := &daemon.CNI{PodID: "pod-1"}
194+
195+
ctx, cancel := context.WithCancel(context.Background())
196+
defer cancel()
197+
respCh := make(chan *AllocResp)
198+
199+
req := NewLocalIPRequest()
200+
req.NoCache = true
201+
go local.allocWorker(ctx, cni, req, respCh)
202+
req.cancel()
203+
result, ok := <-respCh
204+
assert.True(t, ok)
205+
assert.NotNil(t, result)
206+
}
207+
189208
func TestLocal_Dispose(t *testing.T) {
190209
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{}, "")
191210
local.status = statusInUse
@@ -229,6 +248,19 @@ func TestLocal_Allocate_NoCache(t *testing.T) {
229248
assert.Equal(t, 1, len(resp))
230249
}
231250

251+
func TestLocal_DisposeFailWhenAllocatingIsNotEmpty(t *testing.T) {
252+
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{}, "")
253+
local.status = statusInUse
254+
local.ipv4.Add(NewValidIP(netip.MustParseAddr("192.0.2.1"), true))
255+
local.ipv6.Add(NewValidIP(netip.MustParseAddr("fd00:46dd:e::1"), false))
256+
257+
local.allocatingV4 = append(local.allocatingV4, NewLocalIPRequest())
258+
n := local.Dispose(1)
259+
260+
assert.Equal(t, 1, n)
261+
assert.Equal(t, statusInUse, local.status)
262+
}
263+
232264
func TestLocal_Allocate_NoCache_AllocSuccess(t *testing.T) {
233265
local := NewLocalTest(&daemon.ENI{ID: "eni-1"}, nil, &types.PoolConfig{
234266
MaxIPPerENI: 10, EnableIPv4: true, EnableIPv6: true}, "")

0 commit comments

Comments
 (0)