Skip to content

Commit ac3daf6

Browse files
committed
Fixing bug of not properly cleaning up after ALL interfaces when SOME of them failed during create.
The root cause is that in some cases we have returned earlier during interface creation, than starting the asynch thread. But if some other threads have already progressed to the point of forking, they were basically prematurely reaped by the early termination of the whole process. Nasty. Resolution is getting rid of ALL early return cases. In case of an early failure we simply push the result in to the syncher object, even if the error happened in the main thread. Only Syncher can return from the main thread, no sooner and no later than all results are available: be them pushed from the main thread, or from a fork.
1 parent 97abcdd commit ac3daf6

File tree

4 files changed

+38
-19
lines changed

4 files changed

+38
-19
lines changed

pkg/cnidel/cnidel.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,13 @@ func DelegateInterfaceSetup(netConf *datastructs.NetConf, danmClient danmclients
6363
}
6464
rawConfig, err := getCniPluginConfig(netConf, netInfo, ipamOptions, ep)
6565
if err != nil {
66-
freeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
66+
FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
6767
return nil, err
6868
}
6969
cniType := netInfo.Spec.NetworkType
7070
cniResult,err := execCniPlugin(cniType, CniAddOp, netInfo, rawConfig, ep)
7171
if err != nil {
72-
freeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
72+
FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
7373
return nil, errors.New("Error delegating ADD to CNI plugin:" + cniType + " because:" + err.Error())
7474
}
7575
if cniResult != nil {
@@ -187,19 +187,19 @@ func setEpIfaceAddress(cniResult *current.Result, epIface *danmtypes.DanmEpIface
187187
func DelegateInterfaceDelete(netConf *datastructs.NetConf, danmClient danmclientset.Interface, netInfo *danmtypes.DanmNet, ep *danmtypes.DanmEp) error {
188188
rawConfig, err := getCniPluginConfig(netConf, netInfo, datastructs.IpamConfig{Type: ipamType}, ep)
189189
if err != nil {
190-
freeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
190+
FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
191191
return err
192192
}
193193
cniType := netInfo.Spec.NetworkType
194194
_, err = execCniPlugin(cniType, CniDelOp, netInfo, rawConfig, ep)
195195
if err != nil {
196-
freeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
196+
FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
197197
return errors.New("Error delegating DEL to CNI plugin:" + cniType + " because:" + err.Error())
198198
}
199-
return freeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
199+
return FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
200200
}
201201

202-
func freeDelegatedIps(danmClient danmclientset.Interface, netInfo *danmtypes.DanmNet, ip4, ip6 string) error {
202+
func FreeDelegatedIps(danmClient danmclientset.Interface, netInfo *danmtypes.DanmNet, ip4, ip6 string) error {
203203
err4 := freeDelegatedIp(danmClient, netInfo, ip4)
204204
err6 := freeDelegatedIp(danmClient, netInfo, ip6)
205205
if err4 != nil {

pkg/danmep/danmep.go

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,14 @@ import (
66
"log"
77
"runtime"
88
"strconv"
9+
"time"
910
"github.com/containernetworking/plugins/pkg/ns"
1011
"github.com/containernetworking/plugins/pkg/utils/sysctl"
1112
meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1213
danmtypes "github.com/nokia/danm/crd/apis/danm/v1"
1314
danmclientset "github.com/nokia/danm/crd/client/clientset/versioned"
1415
)
1516

16-
const (
17-
defaultNamespace = "default"
18-
dockerApiVersion = "1.22"
19-
)
20-
2117
type sysctlFunction func(danmtypes.DanmEp) bool
2218
type sysctlObject struct {
2319
sysctlName string
@@ -181,3 +177,22 @@ func isIPv6NotNeeded(ep danmtypes.DanmEp) bool {
181177
}
182178
return false
183179
}
180+
181+
func PutDanmEp(danmClient danmclientset.Interface, ep danmtypes.DanmEp) error {
182+
_, err := danmClient.DanmV1().DanmEps(ep.Namespace).Create(&ep)
183+
if err != nil {
184+
return errors.New("DanmEp object could not be PUT to K8s API server due to error:" + err.Error())
185+
}
186+
//We block the thread until DanmEp is really created in the API server, just in case
187+
//We achieve this by not returning until Get for the same resource is successful
188+
//Otherwise garbage collection could leak during CNI ADD if another thread finished unsuccessfully,
189+
//simply because the DanmEp directing interface deletion does not yet exist
190+
for i := 0; i < 100; i++ {
191+
tempEp, err := danmClient.DanmV1().DanmEps(ep.Namespace).Get(ep.ObjectMeta.Name, meta_v1.GetOptions{})
192+
if err == nil && tempEp.ObjectMeta.Name == ep.ObjectMeta.Name {
193+
return nil
194+
}
195+
time.Sleep(10 * time.Millisecond)
196+
}
197+
return errors.New("DanmEp creation was supposedly successful, but the object hasn't really appeared within 1 sec")
198+
}

pkg/metacni/metacni.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -236,20 +236,22 @@ func setupNetworking(args *cniArgs) (*current.Result, error) {
236236
defParam := datastructs.Interface{SequenceId: 0, Ip: "dynamic",}
237237
err = createIface(args, danmClient, args.defaultNetwork, defParam, syncher, allocatedDevices)
238238
if err != nil {
239-
return nil, err
239+
syncher.PushResult(args.defaultNetwork.ObjectMeta.Name, err, nil)
240240
}
241241
}
242242
for nicID, nicParams := range args.interfaces {
243243
nicParams.SequenceId = nicID
244244
nicParams.DefaultIfaceName = defaultIfName
245245
netInfo, err := netcontrol.GetNetworkFromInterface(danmClient, nicParams, args.pod.ObjectMeta.Namespace)
246246
if err != nil {
247-
return nil, errors.New("failed to get network object for Pod:" + args.pod.ObjectMeta.Name +
248-
"'s connection no.:" + strconv.Itoa(nicID) + " due to:" + err.Error())
247+
syncher.PushResult("", errors.New("failed to get network object for Pod:" + args.pod.ObjectMeta.Name +
248+
"'s connection no.:" + strconv.Itoa(nicID) + " due to:" + err.Error()), nil)
249+
continue
249250
}
250251
err = createIface(args, danmClient, netInfo, nicParams, syncher, allocatedDevices)
251252
if err != nil {
252-
return nil, err
253+
syncher.PushResult(netInfo.ObjectMeta.Name, err, nil)
254+
continue
253255
}
254256
}
255257
err = syncher.GetAggregatedResult()
@@ -367,13 +369,15 @@ func createDelegatedInterface(syncher *syncher.Syncher, danmClient danmclientset
367369
syncher.PushResult(netInfo.ObjectMeta.Name, errors.New("CNI delegation failed due to error:" + err.Error()), nil)
368370
return
369371
}
370-
_, err = danmClient.DanmV1().DanmEps(ep.Namespace).Create(&ep)
372+
err = danmep.PutDanmEp(danmClient, ep)
371373
if err != nil {
374+
cnidel.FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
372375
syncher.PushResult(netInfo.ObjectMeta.Name, errors.New("DanmEp object could not be PUT to K8s due to error:" + err.Error()), nil)
373376
return
374377
}
375378
err = danmep.SetDanmEpSysctls(ep)
376379
if err != nil {
380+
cnidel.FreeDelegatedIps(danmClient, netInfo, ep.Spec.Iface.Address, ep.Spec.Iface.AddressIPv6)
377381
syncher.PushResult(netInfo.ObjectMeta.Name, errors.New("Sysctls could not be set due to error:" + err.Error()), nil)
378382
return
379383
}
@@ -400,7 +404,7 @@ func createDanmInterface(syncher *syncher.Syncher, danmClient danmclientset.Inte
400404
syncher.PushResult(netInfo.ObjectMeta.Name, errors.New("DanmEp object could not be created due to error:" + err.Error()), nil)
401405
return
402406
}
403-
_, err = danmClient.DanmV1().DanmEps(ep.Namespace).Create(&ep)
407+
err = danmep.PutDanmEp(danmClient, ep)
404408
if err != nil {
405409
ipam.GarbageCollectIps(danmClient, netInfo, ip4, ip6)
406410
syncher.PushResult(netInfo.ObjectMeta.Name, errors.New("EP could not be PUT into K8s due to error:" + err.Error()), nil)

pkg/syncher/syncher.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,13 @@ func NewSyncher(numOfResults int) *Syncher {
2828
}
2929

3030
func (synch *Syncher) PushResult(cniName string, opRes error, cniRes *current.Result) {
31+
synch.mux.Lock()
32+
defer synch.mux.Unlock()
3133
cniOpResult := cniOpResult {
3234
CniName: cniName,
3335
OpResult: opRes,
3436
CniResult: cniRes,
3537
}
36-
synch.mux.Lock()
37-
defer synch.mux.Unlock()
3838
synch.CniResults = append(synch.CniResults, cniOpResult)
3939
}
4040

0 commit comments

Comments
 (0)