diff --git a/pkg/ext-proc/main.go b/pkg/ext-proc/main.go index 968d09f5a..fb21850b0 100644 --- a/pkg/ext-proc/main.go +++ b/pkg/ext-proc/main.go @@ -71,7 +71,13 @@ var ( "refreshPrometheusMetricsInterval", runserver.DefaultRefreshPrometheusMetricsInterval, "interval to flush prometheus metrics") - logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") + logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") + secureServing = flag.Bool( + "secureServing", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.") + certPath = flag.String( + "certPath", "", "The path to the certificate for secure serving. The certificate and private key files "+ + "are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+ + "then a self-signed certificate is used.") scheme = runtime.NewScheme() ) @@ -133,6 +139,8 @@ func run() error { RefreshMetricsInterval: *refreshMetricsInterval, RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval, Datastore: datastore, + SecureServing: *secureServing, + CertPath: *certPath, } if err := serverRunner.SetupWithManager(mgr); err != nil { klog.ErrorS(err, "Failed to setup ext-proc server") diff --git a/pkg/ext-proc/server/runserver.go b/pkg/ext-proc/server/runserver.go index 2d92e4126..ed260b046 100644 --- a/pkg/ext-proc/server/runserver.go +++ b/pkg/ext-proc/server/runserver.go @@ -2,11 +2,19 @@ package server import ( "context" + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "encoding/pem" "fmt" + "math/big" "time" extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3" "google.golang.org/grpc" + "google.golang.org/grpc/credentials" "k8s.io/apimachinery/pkg/types" klog "k8s.io/klog/v2" ctrl "sigs.k8s.io/controller-runtime" @@ -27,6 +35,8 @@ type ExtProcServerRunner struct { RefreshMetricsInterval time.Duration RefreshPrometheusMetricsInterval time.Duration Datastore *backend.K8sDatastore + SecureServing bool + CertPath string } // Default values for CLI flags in main @@ -38,6 +48,7 @@ const ( DefaultRefreshPodsInterval = 10 * time.Second // default for --refreshPodsInterval DefaultRefreshMetricsInterval = 50 * time.Millisecond // default for --refreshMetricsInterval DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval + DefaultSecureServing = true // default for --secureServing ) func NewDefaultExtProcServerRunner() *ExtProcServerRunner { @@ -49,6 +60,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner { RefreshPodsInterval: DefaultRefreshPodsInterval, RefreshMetricsInterval: DefaultRefreshMetricsInterval, RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval, + SecureServing: DefaultSecureServing, // Datastore can be assigned later. } } @@ -107,8 +119,29 @@ func (r *ExtProcServerRunner) AsRunnable( return err } - // Init the server. - srv := grpc.NewServer() + var srv *grpc.Server + if r.SecureServing { + var cert tls.Certificate + var err error + if r.CertPath != "" { + cert, err = tls.LoadX509KeyPair(r.CertPath+"/tls.crt", r.CertPath+"/tls.key") + } else { + // Create tls based credential. + cert, err = createSelfSignedTLSCertificate() + } + if err != nil { + klog.ErrorS(err, "Failed to create self signed certificate") + return err + } + + creds := credentials.NewTLS(&tls.Config{ + Certificates: []tls.Certificate{cert}, + }) + // Init the server. + srv = grpc.NewServer(grpc.Creds(creds)) + } else { + srv = grpc.NewServer() + } extProcPb.RegisterExternalProcessorServer( srv, handlers.NewServer(pp, scheduling.NewScheduler(pp), r.TargetEndpointKey, r.Datastore), @@ -118,3 +151,48 @@ func (r *ExtProcServerRunner) AsRunnable( return runnable.GRPCServer("ext-proc", srv, r.GrpcPort).Start(ctx) })) } + +func createSelfSignedTLSCertificate() (tls.Certificate, error) { + serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128) + serialNumber, err := rand.Int(rand.Reader, serialNumberLimit) + if err != nil { + klog.ErrorS(err, "Failed to create serial number for self-signed cert") + return tls.Certificate{}, err + } + now := time.Now() + notBefore := now.UTC() + template := x509.Certificate{ + SerialNumber: serialNumber, + Subject: pkix.Name{ + Organization: []string{"Inference Ext"}, + }, + NotBefore: notBefore, + NotAfter: now.Add(time.Hour * 24 * 365 * 10).UTC(), // 10 years + KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth}, + BasicConstraintsValid: true, + } + + priv, err := rsa.GenerateKey(rand.Reader, 4096) + if err != nil { + klog.ErrorS(err, "Failed to generate key for self-signed cert") + return tls.Certificate{}, err + } + + derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv) + if err != nil { + klog.ErrorS(err, "Failed to create self-signed certificate") + return tls.Certificate{}, err + } + + certBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes}) + + privBytes, err := x509.MarshalPKCS8PrivateKey(priv) + if err != nil { + klog.ErrorS(err, "Failed to marshal private key for self-signed certificate") + return tls.Certificate{}, err + } + keyBytes := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: privBytes}) + + return tls.X509KeyPair(certBytes, keyBytes) +} diff --git a/pkg/manifests/gateway/patch_policy.yaml b/pkg/manifests/gateway/patch_policy.yaml index 4a556b446..ae4fb6d8a 100644 --- a/pkg/manifests/gateway/patch_policy.yaml +++ b/pkg/manifests/gateway/patch_policy.yaml @@ -35,6 +35,20 @@ spec: max_pending_requests: 40000 max_requests: 40000 + # This ensures that envoy accepts untrusted certificates. We tried to explicitly + # set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work + # and what worked is setting the common_tls_context to empty. + - type: "type.googleapis.com/envoy.config.cluster.v3.Cluster" + name: "envoyextensionpolicy/default/ext-proc-policy/extproc/0" + operation: + op: add + path: "/transport_socket" + value: + name: "envoy.transport_sockets.tls" + typed_config: + "@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext" + common_tls_context: {} + - type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration" name: default/inference-gateway/llm-gw operation: diff --git a/test/integration/hermetic_test.go b/test/integration/hermetic_test.go index ff018f286..5226b148d 100644 --- a/test/integration/hermetic_test.go +++ b/test/integration/hermetic_test.go @@ -479,6 +479,7 @@ func BeforeSuit() { // Adjust from defaults serverRunner.PoolName = "vllm-llama2-7b-pool" serverRunner.Datastore = backend.NewK8sDataStore() + serverRunner.SecureServing = false if err := serverRunner.SetupWithManager(mgr); err != nil { log.Fatalf("Failed to start server runner: %v", err) diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml index 700eb24c9..ffb8add78 100644 --- a/test/testdata/envoy.yaml +++ b/test/testdata/envoy.yaml @@ -169,6 +169,15 @@ data: max_pending_requests: 40000 max_requests: 40000 max_retries: 1024 + # This ensures that envoy accepts untrusted certificates. We tried to explicitly + # set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work + # and what worked is setting the common_tls_context to empty. + transport_socket: + name: "envoy.transport_sockets.tls" + typed_config: + "@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext + common_tls_context: + validation_context: typed_extension_protocol_options: envoy.extensions.upstreams.http.v3.HttpProtocolOptions: "@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions @@ -219,7 +228,7 @@ spec: - "--service-node" - "$(ENVOY_POD_NAME)" - "--log-level" - - "debug" + - "trace" - "--cpuset-threads" - "--drain-strategy" - "immediate"