Skip to content

Commit cce8c0b

Browse files
authored
Merge branch 'kubernetes-sigs:main' into inferencepool-ref
2 parents 85c9311 + 8b9aef6 commit cce8c0b

File tree

8 files changed

+65
-30
lines changed

8 files changed

+65
-30
lines changed

cmd/epp/main.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"go.uber.org/zap/zapcore"
3131
"google.golang.org/grpc"
3232
healthPb "google.golang.org/grpc/health/grpc_health_v1"
33+
"k8s.io/apimachinery/pkg/types"
3334
"k8s.io/client-go/rest"
3435
"k8s.io/component-base/metrics/legacyregistry"
3536
ctrl "sigs.k8s.io/controller-runtime"
@@ -140,7 +141,11 @@ func run() error {
140141
return err
141142
}
142143

143-
mgr, err := runserver.NewDefaultManager(*poolNamespace, *poolName, cfg)
144+
poolNamespacedName := types.NamespacedName{
145+
Namespace: *poolNamespace,
146+
Name: *poolName,
147+
}
148+
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg)
144149
if err != nil {
145150
setupLog.Error(err, "Failed to create controller manager")
146151
return err

config/manifests/inferencepool-resources.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
apiVersion: inference.networking.x-k8s.io/v1alpha2
55
kind: InferencePool
66
metadata:
7-
labels:
87
name: vllm-llama3-8b-instruct
98
spec:
109
targetPortNumber: 8000
@@ -54,6 +53,8 @@ spec:
5453
args:
5554
- -poolName
5655
- "vllm-llama3-8b-instruct"
56+
- "-poolNamespace"
57+
- "default"
5758
- -v
5859
- "4"
5960
- --zap-encoder

pkg/epp/controller/inferencepool_reconciler.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"reflect"
2222

2323
"k8s.io/apimachinery/pkg/api/errors"
24-
"k8s.io/apimachinery/pkg/types"
2524
"k8s.io/client-go/tools/record"
2625
ctrl "sigs.k8s.io/controller-runtime"
2726
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -36,9 +35,8 @@ import (
3635
// will have the proper controller that will create/manage objects on behalf of the server pool.
3736
type InferencePoolReconciler struct {
3837
client.Client
39-
Record record.EventRecorder
40-
PoolNamespacedName types.NamespacedName
41-
Datastore datastore.Datastore
38+
Record record.EventRecorder
39+
Datastore datastore.Datastore
4240
}
4341

4442
func (c *InferencePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {

pkg/epp/controller/inferencepool_reconciler_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ func TestInferencePoolReconciler(t *testing.T) {
9696

9797
pmf := backendmetrics.NewPodMetricsFactory(&backendmetrics.FakePodMetricsClient{}, time.Second)
9898
datastore := datastore.NewDatastore(ctx, pmf)
99-
inferencePoolReconciler := &InferencePoolReconciler{PoolNamespacedName: namespacedName, Client: fakeClient, Datastore: datastore}
99+
inferencePoolReconciler := &InferencePoolReconciler{Client: fakeClient, Datastore: datastore}
100100

101101
// Step 1: Inception, only ready pods matching pool1 are added to the store.
102102
if _, err := inferencePoolReconciler.Reconcile(ctx, req); err != nil {

pkg/epp/server/controller_manager.go

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
corev1 "k8s.io/api/core/v1"
2323
"k8s.io/apimachinery/pkg/fields"
2424
"k8s.io/apimachinery/pkg/runtime"
25+
"k8s.io/apimachinery/pkg/types"
2526
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2627
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
2728
"k8s.io/client-go/rest"
@@ -39,29 +40,29 @@ func init() {
3940
utilruntime.Must(v1alpha2.Install(scheme))
4041
}
4142

42-
// DefaultManagerOptions returns the default options used to create the manager.
43-
func DefaultManagerOptions(namespace, name string) ctrl.Options {
43+
// defaultManagerOptions returns the default options used to create the manager.
44+
func defaultManagerOptions(namespacedName types.NamespacedName) ctrl.Options {
4445
return ctrl.Options{
4546
Scheme: scheme,
4647
Cache: cache.Options{
4748
ByObject: map[client.Object]cache.ByObject{
4849
&corev1.Pod{}: {
4950
Namespaces: map[string]cache.Config{
50-
namespace: {},
51+
namespacedName.Namespace: {},
5152
},
5253
},
5354
&v1alpha2.InferencePool{}: {
5455
Namespaces: map[string]cache.Config{
55-
namespace: {
56+
namespacedName.Namespace: {
5657
FieldSelector: fields.SelectorFromSet(fields.Set{
57-
"metadata.name": name,
58+
"metadata.name": namespacedName.Name,
5859
}),
5960
},
6061
},
6162
},
6263
&v1alpha2.InferenceModel{}: {
6364
Namespaces: map[string]cache.Config{
64-
namespace: {},
65+
namespacedName.Namespace: {},
6566
},
6667
},
6768
},
@@ -70,8 +71,8 @@ func DefaultManagerOptions(namespace, name string) ctrl.Options {
7071
}
7172

7273
// NewDefaultManager creates a new controller manager with default configuration.
73-
func NewDefaultManager(namespace, name string, restConfig *rest.Config) (ctrl.Manager, error) {
74-
manager, err := ctrl.NewManager(restConfig, DefaultManagerOptions(namespace, name))
74+
func NewDefaultManager(namespacedName types.NamespacedName, restConfig *rest.Config) (ctrl.Manager, error) {
75+
manager, err := ctrl.NewManager(restConfig, defaultManagerOptions(namespacedName))
7576
if err != nil {
7677
return nil, fmt.Errorf("failed to create controller manager: %v", err)
7778
}

pkg/epp/server/runserver.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,7 @@ func (r *ExtProcServerRunner) SetupWithManager(ctx context.Context, mgr ctrl.Man
8787
if err := (&controller.InferencePoolReconciler{
8888
Datastore: r.Datastore,
8989
Client: mgr.GetClient(),
90-
PoolNamespacedName: types.NamespacedName{
91-
Name: r.PoolName,
92-
Namespace: r.PoolNamespace,
93-
},
94-
Record: mgr.GetEventRecorderFor("InferencePool"),
90+
Record: mgr.GetEventRecorderFor("InferencePool"),
9591
}).SetupWithManager(mgr); err != nil {
9692
return fmt.Errorf("failed setting up InferencePoolReconciler: %w", err)
9793
}

site-src/guides/index.md

Lines changed: 39 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
119119

120120
5. Given that the default connection timeout may be insufficient for most inference workloads, it is recommended to configure a timeout appropriate for your intended use case.
121121

122-
```bash
123-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gcp-backend-policy.yaml
124-
```
122+
```bash
123+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gcp-backend-policy.yaml
124+
```
125125

126126
=== "Istio"
127127

@@ -269,10 +269,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
269269

270270
### Cleanup
271271

272-
The following cleanup assumes you would like to clean ALL resources that were created in this quickstart guide.
272+
The following instructions assume you would like to cleanup ALL resources that were created in this quickstart guide.
273273
Please be careful not to delete resources you'd like to keep.
274274

275-
1. Uninstall the Inference Pool
275+
1. Uninstall the InferencePool, InferenceModel, and model server resources
276276

277277
```bash
278278
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool-resources.yaml --ignore-not-found
@@ -282,7 +282,7 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
282282
kubectl delete secret hf-token --ignore-not-found
283283
```
284284

285-
1. Uninstall the Gateway
285+
1. Uninstall the Gateway API resources
286286

287287
```bash
288288
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/gke/gateway.yaml --ignore-not-found
@@ -296,8 +296,40 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
296296
kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/gateway/kgateway/httproute.yaml --ignore-not-found
297297
```
298298

299-
1. Uninstall the CRDs
299+
1. Uninstall the Gateway API Inference Extension CRDs
300300

301301
```bash
302302
kubectl delete -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd --ignore-not-found
303303
```
304+
305+
1. Choose one of the following options to cleanup the Inference Gateway.
306+
307+
=== "GKE"
308+
309+
**TODO**
310+
311+
=== "Istio"
312+
313+
**TODO**
314+
315+
=== "Kgateway"
316+
317+
The following instructions assume you would like to cleanup ALL Kgateway resources that were created in this quickstart guide.
318+
319+
1. Uninstall Kgateway
320+
321+
```bash
322+
helm uninstall kgateway -n kgateway-system
323+
```
324+
325+
1. Uninstall the Kgateway CRDs.
326+
327+
```bash
328+
helm uninstall kgateway-crds -n kgateway-system
329+
```
330+
331+
1. Remove the Kgateway namespace.
332+
333+
```bash
334+
kubectl delete ns kgateway-system
335+
```

site-src/implementations/gateways.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,25 @@ This project has several implementations that are planned or in progress:
55
* [Envoy AI Gateway][1]
66
* [Kgateway][2]
77
* [Google Kubernetes Engine][3]
8+
* [Istio][4]
89

910
[1]:#envoy-gateway
1011
[2]:#kgateway
1112
[3]:#google-kubernetes-engine
13+
[4]:#istio
1214

1315
## Envoy AI Gateway
1416

1517
[Envoy AI Gateway][aigw-home] is an open source project built on top of
16-
[Envoy][envoy-org] and [Envoy Gateway][aigw-gateway] to handle request traffic
18+
[Envoy][envoy-org] and [Envoy Gateway][envoy-gateway] to handle request traffic
1719
from application clients to GenAI services. The features and capabilities are outlined [here][aigw-capabilities]. Use the [quickstart][aigw-quickstart] to get Envoy AI Gateway running with Gateway API in a few simple steps.
1820

1921
Progress towards supporting this project is tracked with a [GitHub
2022
Issue](https://github.com/envoyproxy/ai-gateway/issues/423).
2123

22-
[aigw-home]:https://gateway.envoyproxy.io/
24+
[aigw-home]:https://aigateway.envoyproxy.io/
2325
[envoy-org]:https://github.com/envoyproxy
24-
[aigw-gateway]: https://gateway.envoyproxy.io/
26+
[envoy-gateway]: https://gateway.envoyproxy.io/
2527
[aigw-capabilities]:https://aigateway.envoyproxy.io/docs/capabilities/
2628
[aigw-quickstart]:https://aigateway.envoyproxy.io/docs/capabilities/gateway-api-inference-extension
2729

0 commit comments

Comments
 (0)