-
Notifications
You must be signed in to change notification settings - Fork 89
Fix InferenceModel deletion logic #393
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
a807b40
119cee0
f307bc5
02aa4c9
4d90fbb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,8 +18,8 @@ package controller | |
|
||
import ( | ||
"context" | ||
"fmt" | ||
|
||
"github.com/go-logr/logr" | ||
"k8s.io/apimachinery/pkg/api/errors" | ||
"k8s.io/apimachinery/pkg/runtime" | ||
"k8s.io/apimachinery/pkg/types" | ||
|
@@ -34,6 +34,10 @@ import ( | |
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" | ||
) | ||
|
||
const ( | ||
modelNameKey = "spec.modelName" | ||
) | ||
|
||
type InferenceModelReconciler struct { | ||
client.Client | ||
Scheme *runtime.Scheme | ||
|
@@ -43,44 +47,103 @@ type InferenceModelReconciler struct { | |
} | ||
|
||
func (c *InferenceModelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { | ||
logger := log.FromContext(ctx) | ||
loggerDefault := logger.V(logutil.DEFAULT) | ||
loggerDefault.Info("Reconciling InferenceModel", "name", req.NamespacedName) | ||
if req.Namespace != c.PoolNamespacedName.Namespace { | ||
return ctrl.Result{}, nil | ||
} | ||
logger := log.FromContext(ctx).V(logutil.DEFAULT).WithValues("inferenceModel", req.Name) | ||
ctx = ctrl.LoggerInto(ctx, logger) | ||
|
||
logger.Info("Reconciling InferenceModel") | ||
|
||
infModel := &v1alpha2.InferenceModel{} | ||
notFound := false | ||
if err := c.Get(ctx, req.NamespacedName, infModel); err != nil { | ||
if errors.IsNotFound(err) { | ||
loggerDefault.Info("InferenceModel not found. Removing from datastore since object must be deleted", "name", req.NamespacedName) | ||
c.Datastore.ModelDelete(infModel.Spec.ModelName) | ||
return ctrl.Result{}, nil | ||
if !errors.IsNotFound(err) { | ||
logger.Error(err, "Unable to get InferenceModel") | ||
return ctrl.Result{}, err | ||
} | ||
loggerDefault.Error(err, "Unable to get InferenceModel", "name", req.NamespacedName) | ||
notFound = true | ||
} | ||
|
||
if notFound || !infModel.DeletionTimestamp.IsZero() || infModel.Spec.PoolRef.Name != c.PoolNamespacedName.Name { | ||
// InferenceModel object got deleted or changed the referenced pool. | ||
err := c.handleModelDeleted(ctx, req.NamespacedName) | ||
return ctrl.Result{}, err | ||
} else if !infModel.DeletionTimestamp.IsZero() { | ||
loggerDefault.Info("InferenceModel is marked for deletion. Removing from datastore", "name", req.NamespacedName) | ||
c.Datastore.ModelDelete(infModel.Spec.ModelName) | ||
return ctrl.Result{}, nil | ||
} | ||
|
||
c.updateDatastore(logger, infModel) | ||
// Add or update if the InferenceModel instance has a creation timestamp older than the existing entry of the model. | ||
logger = logger.WithValues("poolRef", infModel.Spec.PoolRef).WithValues("modelName", infModel.Spec.ModelName) | ||
if !c.Datastore.ModelSetIfOlder(infModel) { | ||
logger.Info("Skipping InferenceModel, existing instance has older creation timestamp") | ||
|
||
} | ||
logger.Info("Added/Updated InferenceModel") | ||
|
||
return ctrl.Result{}, nil | ||
} | ||
|
||
func (c *InferenceModelReconciler) updateDatastore(logger logr.Logger, infModel *v1alpha2.InferenceModel) { | ||
loggerDefault := logger.V(logutil.DEFAULT) | ||
func (c *InferenceModelReconciler) handleModelDeleted(ctx context.Context, req types.NamespacedName) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like this func here is what causes the need for the dual dictionary. I could see a high amount of adapter churn, but that will just augment the existing infModel, I'm not sure there will be a high amount of infModel churn. What do you think about paying the Points in favor:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sounds good, I debated this as well, I wanted to address modelName updates where we would need to do a lookup in the cache by object name every time to check if a modelName change actually happened, but I believe we should make the modelName immutable and so we don't need to handle this case here. I didn't use sync.Map because in |
||
logger := log.FromContext(ctx) | ||
|
||
// We will lookup the modelName associated with this object to search for | ||
// other instance referencing the same ModelName if exist to store the oldest in | ||
// its place. This ensures that the InferenceModel with the oldest creation | ||
// timestamp is active. | ||
existing, exists := c.Datastore.ModelGetByObjName(req) | ||
if !exists { | ||
// No entry exists in the first place, nothing to do. | ||
return nil | ||
} | ||
// Delete the internal object, it may be replaced with another version below. | ||
c.Datastore.ModelDelete(req) | ||
logger.Info("InferenceModel removed from datastore", "poolRef", existing.Spec.PoolRef, "modelName", existing.Spec.ModelName) | ||
|
||
if infModel.Spec.PoolRef.Name == c.PoolNamespacedName.Name { | ||
loggerDefault.Info("Updating datastore", "poolRef", infModel.Spec.PoolRef, "serverPoolName", c.PoolNamespacedName) | ||
loggerDefault.Info("Adding/Updating InferenceModel", "modelName", infModel.Spec.ModelName) | ||
c.Datastore.ModelSet(infModel) | ||
return | ||
// List all InferenceModels with a matching ModelName. | ||
var models v1alpha2.InferenceModelList | ||
if err := c.List(ctx, &models, client.MatchingFields{modelNameKey: existing.Spec.ModelName}, client.InNamespace(c.PoolNamespacedName.Namespace)); err != nil { | ||
return fmt.Errorf("listing models that match the modelName %s: %w", existing.Spec.ModelName, err) | ||
} | ||
if len(models.Items) == 0 { | ||
// No other instances of InferenceModels with this ModelName exists. | ||
return nil | ||
} | ||
|
||
var oldest *v1alpha2.InferenceModel | ||
for i := range models.Items { | ||
m := &models.Items[i] | ||
if m.Spec.ModelName != existing.Spec.ModelName || // The index should filter those out, but just in case! | ||
m.Spec.PoolRef.Name != c.PoolNamespacedName.Name || // We don't care about other pools, we could setup an index on this too! | ||
m.Name == existing.Name { // We don't care about the same object, it could be in the list if it was only marked for deletion, but not yet deleted. | ||
continue | ||
} | ||
if oldest == nil || m.ObjectMeta.CreationTimestamp.Before(&oldest.ObjectMeta.CreationTimestamp) { | ||
oldest = m | ||
} | ||
} | ||
loggerDefault.Info("Removing/Not adding InferenceModel", "modelName", infModel.Spec.ModelName) | ||
// If we get here. The model is not relevant to this pool, remove. | ||
c.Datastore.ModelDelete(infModel.Spec.ModelName) | ||
if oldest != nil && c.Datastore.ModelSetIfOlder(oldest) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should short ciruit ealier in the Reconcile, stop processing if there is already a cached model, and the new one is diff. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that the code you are commenting on is related to handling the fallback to another older version of the InferenceModel when this model is deleted, so it is not related to model update. But to answer your question, we need to check if the cached one is older or not, it may not be because of the order of events being processed by the reconciler, and this is handled at lint 76, . |
||
logger.Info("InferenceModel replaced.", | ||
"poolRef", oldest.Spec.PoolRef, | ||
"modelName", oldest.Spec.ModelName, | ||
"newInferenceModel", types.NamespacedName{Name: oldest.Name, Namespace: oldest.Namespace}) | ||
} | ||
|
||
return nil | ||
} | ||
|
||
func indexInferenceModelsByModelName(obj client.Object) []string { | ||
m, ok := obj.(*v1alpha2.InferenceModel) | ||
if !ok { | ||
return nil | ||
} | ||
return []string{m.Spec.ModelName} | ||
} | ||
|
||
func (c *InferenceModelReconciler) SetupWithManager(mgr ctrl.Manager) error { | ||
func (c *InferenceModelReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager) error { | ||
// Create an index on ModelName for InferenceModel objects. | ||
indexer := mgr.GetFieldIndexer() | ||
if err := indexer.IndexField(ctx, &v1alpha2.InferenceModel{}, modelNameKey, indexInferenceModelsByModelName); err != nil { | ||
return fmt.Errorf("setting index on ModelName for InferenceModel: %w", err) | ||
} | ||
return ctrl.NewControllerManagedBy(mgr). | ||
For(&v1alpha2.InferenceModel{}). | ||
WithEventFilter(predicate.Funcs{ | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
modelName is currently
immutablemutable, but we plan to make itmutableimmutable, and so here I didn't address the case where the infModel changes modelName here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there a use case about mutating modelName
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should make it immutable to guarantee that it doesn't change, otherwise the epp would behave in an unpredictable way.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the intended statement was:
modelName is currently mutable, but we plan to make it immutable
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ops, yes :)
I opened #408 to track the work to make it immutable.