Skip to content

Commit d66b732

Browse files
authored
InferencePool config proposal for API review (#162)
* InferencePool Config API * Addressing comments * Addressing comments round 2 * Addressing comments round 3 * Make ExtensionConfig required * Cleanup * add inline * integration test fixes and making extensionRef inlined * rename to extensionRef
1 parent 3e95d77 commit d66b732

13 files changed

+482
-20
lines changed

Makefile

+4
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,10 @@ vet: ## Run go vet against code.
105105
test: manifests generate fmt vet envtest ## Run tests.
106106
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
107107

108+
.PHONY: test-integration
109+
test-integration: manifests generate fmt vet envtest ## Run tests.
110+
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration -coverprofile cover.out
111+
108112
.PHONY: test-e2e
109113
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs.
110114
go test ./test/e2e/ -v -ginkgo.v

api/v1alpha1/inferencepool_types.go

+82
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,90 @@ type InferencePoolSpec struct {
5959
// +kubebuilder:validation:Maximum=65535
6060
// +kubebuilder:validation:Required
6161
TargetPortNumber int32 `json:"targetPortNumber"`
62+
63+
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint
64+
// picker service that picks endpoints for the requests routed to this pool.
65+
EndpointPickerConfig `json:",inline"`
66+
}
67+
68+
// EndpointPickerConfig specifies the configuration needed by the proxy to discover and connect to the endpoint picker extension.
69+
// This type is intended to be a union of mutually exclusive configuration options that we may add in the future.
70+
type EndpointPickerConfig struct {
71+
// Extension configures an endpoint picker as an extension service.
72+
//
73+
// +kubebuilder:validation:Required
74+
ExtensionRef *Extension `json:"extensionRef,omitempty"`
6275
}
6376

77+
// Extension specifies how to configure an extension that runs the endpoint picker.
78+
type Extension struct {
79+
// Reference is a reference to a service extension.
80+
ExtensionReference `json:",inline"`
81+
82+
// ExtensionConnection configures the connection between the gateway and the extension.
83+
ExtensionConnection `json:",inline"`
84+
}
85+
86+
// ExtensionReference is a reference to the extension deployment.
87+
type ExtensionReference struct {
88+
// Group is the group of the referent.
89+
// When unspecified or empty string, core API group is inferred.
90+
//
91+
// +optional
92+
// +kubebuilder:default=""
93+
Group *string `json:"group,omitempty"`
94+
95+
// Kind is the Kubernetes resource kind of the referent. For example
96+
// "Service".
97+
//
98+
// Defaults to "Service" when not specified.
99+
//
100+
// ExternalName services can refer to CNAME DNS records that may live
101+
// outside of the cluster and as such are difficult to reason about in
102+
// terms of conformance. They also may not be safe to forward to (see
103+
// CVE-2021-25740 for more information). Implementations MUST NOT
104+
// support ExternalName Services.
105+
//
106+
// +optional
107+
// +kubebuilder:default=Service
108+
Kind *string `json:"kind,omitempty"`
109+
110+
// Name is the name of the referent.
111+
//
112+
// +kubebuilder:validation:Required
113+
Name string `json:"name"`
114+
115+
// The port number on the pods running the extension. When unspecified, implementations SHOULD infer a
116+
// default value of 9002 when the Kind is Service.
117+
//
118+
// +kubebuilder:validation:Minimum=1
119+
// +kubebuilder:validation:Maximum=65535
120+
// +optional
121+
TargetPortNumber *int32 `json:"targetPortNumber,omitempty"`
122+
}
123+
124+
// ExtensionConnection encapsulates options that configures the connection to the extension.
125+
type ExtensionConnection struct {
126+
// Configures how the gateway handles the case when the extension is not responsive.
127+
// Defaults to failClose.
128+
//
129+
// +optional
130+
// +kubebuilder:default="FailClose"
131+
FailureMode *ExtensionFailureMode `json:"failureMode"`
132+
}
133+
134+
// ExtensionFailureMode defines the options for how the gateway handles the case when the extension is not
135+
// responsive.
136+
// +kubebuilder:validation:Enum=FailOpen;FailClose
137+
type ExtensionFailureMode string
138+
139+
const (
140+
// FailOpen specifies that the proxy should not drop the request and forward the request to and endpoint of its picking.
141+
FailOpen ExtensionFailureMode = "FailOpen"
142+
// FailClose specifies that the proxy should drop the request.
143+
FailClose ExtensionFailureMode = "FailClose"
144+
)
145+
64146
// LabelKey was originally copied from: https://github.com/kubernetes-sigs/gateway-api/blob/99a3934c6bc1ce0874f3a4c5f20cafd8977ffcb4/apis/v1/shared_types.go#L694-L731
65147
// Duplicated as to not take an unexpected dependency on gw's API.
66148
//

api/v1alpha1/zz_generated.deepcopy.go

+88
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/api/v1alpha1/endpointpickerconfig.go

+38
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/api/v1alpha1/extension.go

+75
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

client-go/applyconfiguration/api/v1alpha1/extensionconnection.go

+42
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)