Skip to content

[Merged by Bors] - OpenShift compatibility #126

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 24 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0b459b2
Added SCC and update cluster role name
razvan Aug 24, 2022
a5ca51e
Fix indentantion.
razvan Aug 24, 2022
08b47f4
make regenerate-charts
razvan Aug 24, 2022
35f37f2
Update the spark-k8s-clusterrole to allow spark-submit to create pods.
razvan Aug 24, 2022
f12f35e
wip: minio setup on openshift.
razvan Aug 24, 2022
78663be
kuttl tests: still wip
razvan Aug 25, 2022
1ad6924
Add run_as_group and use the same node label as the airflow tests.
razvan Aug 26, 2022
db682b2
Merge branch 'main' into 125-openshift-compatibility
razvan Aug 26, 2022
d32ed11
make regenerate-charts
razvan Aug 26, 2022
88a9366
Remove runAsNonRoot
razvan Aug 26, 2022
ff89e54
make regenerate-charts
razvan Aug 26, 2022
652fe77
Fix minio public bucket setup.
razvan Aug 26, 2022
6ad750b
spark-pi-public-s3 test now works
razvan Aug 26, 2022
a505a71
private s3 test works
razvan Aug 29, 2022
bc66a60
kuttl test spark-ny-public-s3 works.
razvan Aug 30, 2022
a7a3c09
All kuttl tests work now.
razvan Aug 30, 2022
a27c9ed
Update the security context of the operator Pod
razvan Aug 30, 2022
e869395
make regenerate-charts
razvan Aug 30, 2022
6aeaced
Update CHANGELOG.
razvan Aug 30, 2022
39d6aa9
Add runAsUser to the Spark application security context.
razvan Aug 31, 2022
0bcaa03
Add runAsUser to Spark pod templates too.
razvan Aug 31, 2022
d1c1a87
Wait for the Minio deployment to be ready.
razvan Aug 31, 2022
98b5eb1
Merge node-selector and spark-examples
razvan Sep 2, 2022
24c9160
Merge branch 'main' into 125-openshift-compatibility
razvan Sep 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ All notable changes to this project will be documented in this file.

- Add missing role to read S3Connection and S3Bucket objects ([#112]).
- Update annotation due to update to rust version ([#114]).
- Update RBAC properties for OpenShift compatibility ([#126]).

[#112]: https://github.com/stackabletech/spark-k8s-operator/pull/112
[#114]: https://github.com/stackabletech/spark-k8s-operator/pull/114
[#126]: https://github.com/stackabletech/spark-k8s-operator/pull/126

## [0.4.0] - 2022-08-03

Expand Down
8 changes: 8 additions & 0 deletions deploy/helm/spark-k8s-operator/templates/roles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,11 @@ rules:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterroles
verbs:
- bind
resourceNames:
- {{ include "operator.name" . }}-clusterrole
77 changes: 72 additions & 5 deletions deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml
Original file line number Diff line number Diff line change
@@ -1,15 +1,82 @@
{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }}
---
apiVersion: security.openshift.io/v1
kind: SecurityContextConstraints
metadata:
name: spark-k8s-scc
annotations:
kubernetes.io/description: |-
This resource is derived from hostmount-anyuid. It provides all the features of the
restricted SCC but allows host mounts and any UID by a pod. This is primarily
used by the persistent volume recycler. WARNING: this SCC allows host file
system access as any UID, including UID 0. Grant with caution.
release.openshift.io/create-only: "true"
allowHostDirVolumePlugin: true
allowHostIPC: false
allowHostNetwork: false
allowHostPID: false
allowHostPorts: false
allowPrivilegeEscalation: true
allowPrivilegedContainer: false
allowedCapabilities: null
defaultAddCapabilities: null
fsGroup:
type: RunAsAny
groups: []
priority: null
readOnlyRootFilesystem: false
runAsUser:
type: RunAsAny
seLinuxContext:
type: MustRunAs
supplementalGroups:
type: RunAsAny
volumes:
- configMap
- downwardAPI
- emptyDir
- hostPath
- nfs
- persistentVolumeClaim
- projected
- secret
- ephemeral
{{ end }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: spark-driver-edit-role
name: {{ include "operator.name" . }}-clusterrole
rules:
- apiGroups: [""]
resources: ["pods", "services", "configmaps"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups:
- ""
resources:
- persistentvolumeclaims
- configmaps
- pods
- secrets
- serviceaccounts
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- events.k8s.io
resources:
- events
verbs:
- create
{{ if .Capabilities.APIVersions.Has "security.openshift.io/v1" }}
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
resourceNames:
- spark-k8s-scc
verbs:
- use
{{ end }}
24 changes: 16 additions & 8 deletions deploy/helm/spark-k8s-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,22 @@ podAnnotations: {}

podSecurityContext: {}
# fsGroup: 2000

securityContext: {}
# capabilities:
# drop:
# - ALL
# readOnlyRootFilesystem: true
# runAsNonRoot: true
# runAsUser: 1000
#
# OpenShift 4.11 replaces the PodSecurityPolicy with a new pod security
# admission mechanism as described in this blog post [1].
# This requires Pods to explicitely specify the securityContext.
#
# [1]: https://cloud.redhat.com/blog/pod-security-admission-in-openshift-4.11
securityContext:
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
allowPrivilegeEscalation: false
seccompProfile:
type: RuntimeDefault
runAsNonRoot: true
runAsUser: 1000

resources: {}
# We usually recommend not to specify default resources and to leave this as a conscious
Expand Down
11 changes: 10 additions & 1 deletion deploy/manifests/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,16 @@ spec:
securityContext: {}
containers:
- name: spark-k8s-operator
securityContext: {}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "docker.stackable.tech/stackable/spark-k8s-operator:0.5.0-nightly"
imagePullPolicy: IfNotPresent
resources: {}
Expand Down
8 changes: 8 additions & 0 deletions deploy/manifests/roles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,11 @@ rules:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- clusterroles
verbs:
- bind
resourceNames:
- spark-k8s-clusterrole
23 changes: 18 additions & 5 deletions deploy/manifests/spark-clusterrole.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,27 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: spark-driver-edit-role
name: spark-k8s-clusterrole
rules:
- apiGroups: [""]
resources: ["pods", "services", "configmaps"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups:
- ""
resources:
- persistentvolumeclaims
- configmaps
- pods
- secrets
- serviceaccounts
- services
verbs:
- create
- delete
- get
- list
- patch
- update
- watch
- apiGroups:
- events.k8s.io
resources:
- events
verbs:
- create
4 changes: 2 additions & 2 deletions docs/modules/ROOT/pages/rbac.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ The https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac[Spark-K

However, to add security, each `spark-submit` job launched by the spark-k8s operator will be assigned its own service account.

When the spark-k8s operator is installed via helm, a cluster role named `spark-driver-edit-role` is created with pre-defined permissions.
When the spark-k8s operator is installed via Helm, a cluster role named `spark-k8s-clusterrole` is created with pre-defined permissions.

When a new Spark application is submitted, the operator creates a new service account with the same name as the application and binds this account to the cluster role `spark-driver-edit-role` created by helm.
When a new Spark application is submitted, the operator creates a new service account with the same name as the application and binds this account to the cluster role `spark-k8s-clusterrole` created by Helm.
2 changes: 2 additions & 0 deletions rust/crd/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ pub const CONTAINER_NAME_EXECUTOR: &str = "spark-executor";
pub const ACCESS_KEY_ID: &str = "accessKeyId";
pub const SECRET_ACCESS_KEY: &str = "secretAccessKey";
pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets";

pub const SPARK_UID: i64 = 1000;
30 changes: 19 additions & 11 deletions rust/operator-binary/src/spark_k8s_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ use stackable_operator::commons::s3::InlinedS3BucketSpec;
use stackable_operator::commons::tls::{CaCert, TlsVerification};
use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec};
use stackable_operator::k8s_openapi::api::core::v1::{
ConfigMap, ConfigMapVolumeSource, Container, EnvVar, Pod, PodSpec, PodTemplateSpec,
ServiceAccount, Volume, VolumeMount,
ConfigMap, ConfigMapVolumeSource, Container, EnvVar, Pod, PodSecurityContext, PodSpec,
PodTemplateSpec, ServiceAccount, Volume, VolumeMount,
};
use stackable_operator::k8s_openapi::api::rbac::v1::{ClusterRole, RoleBinding, RoleRef, Subject};
use stackable_operator::k8s_openapi::Resource;
Expand All @@ -21,7 +21,7 @@ use std::{sync::Arc, time::Duration};
use strum::{EnumDiscriminants, IntoStaticStr};

const FIELD_MANAGER_SCOPE: &str = "sparkapplication";
const SPARK_CLUSTER_ROLE: &str = "spark-driver-edit-role";
const SPARK_CLUSTER_ROLE: &str = "spark-k8s-clusterrole";

pub struct Ctx {
pub client: stackable_operator::client::Client,
Expand Down Expand Up @@ -234,10 +234,7 @@ fn pod_template(
let mut pod_spec = PodSpec {
containers: vec![cb.build()],
volumes: Some(volumes.to_vec()),
security_context: PodSecurityContextBuilder::new()
.fs_group(1000)
.build()
.into(), // Needed for secret-operator
security_context: Some(security_context()),
..PodSpec::default()
};

Expand Down Expand Up @@ -370,10 +367,7 @@ fn spark_job(
service_account_name: serviceaccount.metadata.name.clone(),
volumes: Some(volumes),
image_pull_secrets: spark_application.spark_image_pull_secrets(),
security_context: PodSecurityContextBuilder::new()
.fs_group(1000)
.build()
.into(), // Needed for secret-operator
security_context: Some(security_context()),
node_selector: spark_application.driver_node_selector(),
..PodSpec::default()
}),
Expand Down Expand Up @@ -439,6 +433,20 @@ fn build_spark_role_serviceaccount(
Ok((sa, binding))
}

fn security_context() -> PodSecurityContext {
PodSecurityContextBuilder::new()
.fs_group(1000)
// OpenShift generates UIDs for processes inside Pods. Setting the UID is optional,
// *but* if specified, OpenShift will check that the value is within the
// valid range generated by the SCC (security context constraints) for this Pod.
// On the other hand, it is *required* to set the process UID in KinD, K3S as soon
// as the runAsGroup property is set.
.run_as_user(SPARK_UID)
// Required to access files in mounted volumes on OpenShift.
.run_as_group(0)
.build()
}

pub fn error_policy(_error: &Error, _ctx: Arc<Ctx>) -> Action {
Action::requeue(Duration::from_secs(5))
}
14 changes: 0 additions & 14 deletions tests/templates/kuttl/node-selector/10-assert.yaml

This file was deleted.

24 changes: 0 additions & 24 deletions tests/templates/kuttl/node-selector/10-deploy-spark-app.yaml.j2

This file was deleted.

24 changes: 15 additions & 9 deletions tests/templates/kuttl/pyspark-ny-public-s3-image/00-assert.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,23 @@ metadata:
name: minio
timeout: 900
---
apiVersion: v1
kind: Service
apiVersion: apps/v1
kind: Deployment
metadata:
name: test-minio
labels:
app: minio
status:
readyReplicas: 1
---
apiVersion: apps/v1
kind: StatefulSet
apiVersion: v1
kind: Pod
metadata:
name: minio-mc
name: minio-client
labels:
app: minio-client
status:
readyReplicas: 1
replicas: 1
phase: Running
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: integration-tests-sa

This file was deleted.

Loading