1
+ {{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
1
2
apiVersion : inference.networking.x-k8s.io/v1alpha2
2
3
kind : InferencePool
3
4
metadata :
@@ -8,85 +9,10 @@ metadata:
8
9
spec :
9
10
targetPortNumber : {{ .Values.inferencePool.targetPortNumber }}
10
11
selector :
11
- {{- range $key, $value := .Values.inferencePool.selector }}
12
- {{ $key }}: {{ quote $value }}
13
- {{- end }}
12
+ {{- if .Values.inferencePool.modelServers.matchLabels }}
13
+ {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
14
+ {{ $key }}: {{ quote $value }}
15
+ {{- end }}
16
+ {{- end }}
14
17
extensionRef :
15
18
name : {{ include "gateway-api-inference-extension.name" . }}
16
- ---
17
- apiVersion : apps/v1
18
- kind : Deployment
19
- metadata :
20
- name : {{ include "gateway-api-inference-extension.name" . }}
21
- namespace : {{ .Release.Namespace }}
22
- labels :
23
- {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
24
- spec :
25
- replicas : {{ .Values.inferenceExtension.replicas | default 1 }}
26
- selector :
27
- matchLabels :
28
- {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
29
- template :
30
- metadata :
31
- labels :
32
- {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
33
- spec :
34
- serviceAccountName : {{ include "gateway-api-inference-extension.name" . }}
35
- containers :
36
- - name : epp
37
- image : {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
38
- imagePullPolicy : {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
39
- args :
40
- - -poolName
41
- - {{ .Values.inferencePool.name }}
42
- - -poolNamespace
43
- - {{ .Release.Namespace }}
44
- - -v
45
- - " 3"
46
- - -grpcPort
47
- - " 9002"
48
- - -grpcHealthPort
49
- - " 9003"
50
- - -metricsPort
51
- - " 9090"
52
- env :
53
- - name : USE_STREAMING
54
- value : " true"
55
- ports :
56
- - name : grpc
57
- containerPort : 9002
58
- - name : grpc-health
59
- containerPort : 9003
60
- - name : metrics
61
- containerPort : 9090
62
- livenessProbe :
63
- grpc :
64
- port : 9003
65
- service : inference-extension
66
- initialDelaySeconds : 5
67
- periodSeconds : 10
68
- readinessProbe :
69
- grpc :
70
- port : 9003
71
- service : inference-extension
72
- initialDelaySeconds : 5
73
- periodSeconds : 10
74
- ---
75
- apiVersion : v1
76
- kind : Service
77
- metadata :
78
- name : {{ include "gateway-api-inference-extension.name" . }}
79
- namespace : {{ .Release.Namespace }}
80
- labels :
81
- {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
82
- spec :
83
- selector :
84
- {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
85
- ports :
86
- - name : grpc-ext-proc
87
- protocol : TCP
88
- port : {{ .Values.inferenceExtension.extProcPort | default 9002 }}
89
- - name : http-metrics
90
- protocol : TCP
91
- port : {{ .Values.inferenceExtension.metricsPort | default 9090 }}
92
- type : ClusterIP
0 commit comments