Skip to content

Add TLS support with self-signed certificate. #335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 14, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion pkg/ext-proc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ var (
"refreshPrometheusMetricsInterval",
runserver.DefaultRefreshPrometheusMetricsInterval,
"interval to flush prometheus metrics")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity")
secureServing = flag.Bool(
"secureServing", runserver.DefaultSecureServing, "Enables secure serving. Defaults to true.")
certPath = flag.String(
"certPath", "", "The path to the certificate for secure serving. The certificate and private key files "+
"are assumed to be named tls.crt and tls.key, respectively. If not set, and secureServing is enabled, "+
"then a self-signed certificate is used.")

scheme = runtime.NewScheme()
)
Expand Down Expand Up @@ -133,6 +139,8 @@ func run() error {
RefreshMetricsInterval: *refreshMetricsInterval,
RefreshPrometheusMetricsInterval: *refreshPrometheusMetricsInterval,
Datastore: datastore,
SecureServing: *secureServing,
CertPath: *certPath,
}
if err := serverRunner.SetupWithManager(mgr); err != nil {
klog.ErrorS(err, "Failed to setup ext-proc server")
Expand Down
82 changes: 80 additions & 2 deletions pkg/ext-proc/server/runserver.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,19 @@ package server

import (
"context"
"crypto/rand"
"crypto/rsa"
"crypto/tls"
"crypto/x509"
"crypto/x509/pkix"
"encoding/pem"
"fmt"
"math/big"
"time"

extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials"
"k8s.io/apimachinery/pkg/types"
klog "k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
Expand All @@ -27,6 +35,8 @@ type ExtProcServerRunner struct {
RefreshMetricsInterval time.Duration
RefreshPrometheusMetricsInterval time.Duration
Datastore *backend.K8sDatastore
SecureServing bool
CertPath string
}

// Default values for CLI flags in main
Expand All @@ -38,6 +48,7 @@ const (
DefaultRefreshPodsInterval = 10 * time.Second // default for --refreshPodsInterval
DefaultRefreshMetricsInterval = 50 * time.Millisecond // default for --refreshMetricsInterval
DefaultRefreshPrometheusMetricsInterval = 5 * time.Second // default for --refreshPrometheusMetricsInterval
DefaultSecureServing = true // default for --secureServing
)

func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
Expand All @@ -49,6 +60,7 @@ func NewDefaultExtProcServerRunner() *ExtProcServerRunner {
RefreshPodsInterval: DefaultRefreshPodsInterval,
RefreshMetricsInterval: DefaultRefreshMetricsInterval,
RefreshPrometheusMetricsInterval: DefaultRefreshPrometheusMetricsInterval,
SecureServing: DefaultSecureServing,
// Datastore can be assigned later.
}
}
Expand Down Expand Up @@ -107,8 +119,29 @@ func (r *ExtProcServerRunner) AsRunnable(
return err
}

// Init the server.
srv := grpc.NewServer()
var srv *grpc.Server
if r.SecureServing {
var cert tls.Certificate
var err error
if r.CertPath != "" {
cert, err = tls.LoadX509KeyPair(r.CertPath+"/tls.crt", r.CertPath+"/tls.key")
} else {
// Create tls based credential.
cert, err = createSelfSignedTLSCertificate()
}
if err != nil {
klog.ErrorS(err, "Failed to create self signed certificate")
return err
}

creds := credentials.NewTLS(&tls.Config{
Certificates: []tls.Certificate{cert},
})
// Init the server.
srv = grpc.NewServer(grpc.Creds(creds))
} else {
srv = grpc.NewServer()
}
extProcPb.RegisterExternalProcessorServer(
srv,
handlers.NewServer(pp, scheduling.NewScheduler(pp), r.TargetEndpointKey, r.Datastore),
Expand All @@ -118,3 +151,48 @@ func (r *ExtProcServerRunner) AsRunnable(
return runnable.GRPCServer("ext-proc", srv, r.GrpcPort).Start(ctx)
}))
}

func createSelfSignedTLSCertificate() (tls.Certificate, error) {
serialNumberLimit := new(big.Int).Lsh(big.NewInt(1), 128)
serialNumber, err := rand.Int(rand.Reader, serialNumberLimit)
if err != nil {
klog.ErrorS(err, "Failed to create serial number for self-signed cert")
return tls.Certificate{}, err
}
now := time.Now()
notBefore := now.UTC()
template := x509.Certificate{
SerialNumber: serialNumber,
Subject: pkix.Name{
Organization: []string{"Inference Ext"},
},
NotBefore: notBefore,
NotAfter: now.Add(time.Hour * 24 * 365 * 10).UTC(), // 10 years
KeyUsage: x509.KeyUsageKeyEncipherment | x509.KeyUsageDigitalSignature,
ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageServerAuth},
BasicConstraintsValid: true,
}

priv, err := rsa.GenerateKey(rand.Reader, 4096)
if err != nil {
klog.ErrorS(err, "Failed to generate key for self-signed cert")
return tls.Certificate{}, err
}

derBytes, err := x509.CreateCertificate(rand.Reader, &template, &template, &priv.PublicKey, priv)
if err != nil {
klog.ErrorS(err, "Failed to create self-signed certificate")
return tls.Certificate{}, err
}

certBytes := pem.EncodeToMemory(&pem.Block{Type: "CERTIFICATE", Bytes: derBytes})

privBytes, err := x509.MarshalPKCS8PrivateKey(priv)
if err != nil {
klog.ErrorS(err, "Failed to marshal private key for self-signed certificate")
return tls.Certificate{}, err
}
keyBytes := pem.EncodeToMemory(&pem.Block{Type: "PRIVATE KEY", Bytes: privBytes})

return tls.X509KeyPair(certBytes, keyBytes)
}
14 changes: 14 additions & 0 deletions pkg/manifests/gateway/patch_policy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ spec:
max_pending_requests: 40000
max_requests: 40000

# This ensures that envoy accepts untrusted certificates. We tried to explicitly
# set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work
# and what worked is setting the common_tls_context to empty.
- type: "type.googleapis.com/envoy.config.cluster.v3.Cluster"
name: "envoyextensionpolicy/default/ext-proc-policy/extproc/0"
operation:
op: add
path: "/transport_socket"
value:
name: "envoy.transport_sockets.tls"
typed_config:
"@type": "type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext"
common_tls_context: {}

- type: "type.googleapis.com/envoy.config.route.v3.RouteConfiguration"
name: default/inference-gateway/llm-gw
operation:
Expand Down
1 change: 1 addition & 0 deletions test/integration/hermetic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ func BeforeSuit() {
// Adjust from defaults
serverRunner.PoolName = "vllm-llama2-7b-pool"
serverRunner.Datastore = backend.NewK8sDataStore()
serverRunner.SecureServing = false

if err := serverRunner.SetupWithManager(mgr); err != nil {
log.Fatalf("Failed to start server runner: %v", err)
Expand Down
11 changes: 10 additions & 1 deletion test/testdata/envoy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,15 @@ data:
max_pending_requests: 40000
max_requests: 40000
max_retries: 1024
# This ensures that envoy accepts untrusted certificates. We tried to explicitly
# set TrustChainVerification to ACCEPT_UNSTRUSTED, but that actually didn't work
# and what worked is setting the common_tls_context to empty.
transport_socket:
name: "envoy.transport_sockets.tls"
typed_config:
"@type": type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext
common_tls_context:
validation_context:
typed_extension_protocol_options:
envoy.extensions.upstreams.http.v3.HttpProtocolOptions:
"@type": type.googleapis.com/envoy.extensions.upstreams.http.v3.HttpProtocolOptions
Expand Down Expand Up @@ -219,7 +228,7 @@ spec:
- "--service-node"
- "$(ENVOY_POD_NAME)"
- "--log-level"
- "debug"
- "trace"
- "--cpuset-threads"
- "--drain-strategy"
- "immediate"
Expand Down