From 4fac471d13ea5633358cf029a034d3263be0ca77 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 17 Jun 2022 19:36:57 +0200 Subject: [PATCH 01/30] wip --- Cargo.lock | 53 ++++++-- deploy/crd/sparkapplication.crd.yaml | 39 +++++- rust/crd/Cargo.toml | 2 +- rust/crd/src/constants.rs | 1 + rust/crd/src/lib.rs | 120 ++++++++++++------ rust/operator-binary/Cargo.toml | 4 +- .../src/spark_k8s_controller.rs | 40 +++--- 7 files changed, 186 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b20d100b..42261253 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -274,8 +274,18 @@ version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" dependencies = [ - "darling_core", - "darling_macro", + "darling_core 0.13.4", + "darling_macro 0.13.4", +] + +[[package]] +name = "darling" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4529658bdda7fd6769b8614be250cdcfc3aeb0ee72fe66f9e41e5e5eb73eac02" +dependencies = [ + "darling_core 0.14.1", + "darling_macro 0.14.1", ] [[package]] @@ -292,13 +302,38 @@ dependencies = [ "syn", ] +[[package]] +name = "darling_core" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "649c91bc01e8b1eac09fb91e8dbc7d517684ca6be8ebc75bb9cafc894f9fdb6f" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + [[package]] name = "darling_macro" version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" dependencies = [ - "darling_core", + "darling_core 0.13.4", + "quote", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddfc69c5bfcbd2fc09a0f38451d2daf0e372e367986a83906d1b0dbc88134fb5" +dependencies = [ + "darling_core 0.14.1", "quote", "syn", ] @@ -895,7 +930,7 @@ version = "0.71.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "203f7c5acf9d0dfb0b08d44ec1d66ace3d1dfe0cdd82e65e274f3f96615d666c" dependencies = [ - "darling", + "darling 0.13.4", "proc-macro2", "quote", "serde_json", @@ -1664,8 +1699,8 @@ dependencies = [ [[package]] name = "stackable-operator" -version = "0.19.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.19.0#f8f2d5527b3463cc40f3d851172af7ae81db75c1" +version = "0.21.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.21.0#dbfa6d45fb59fadd17f9b571255c0fdc4c522671" dependencies = [ "backoff", "chrono", @@ -1698,10 +1733,10 @@ dependencies = [ [[package]] name = "stackable-operator-derive" -version = "0.17.0" -source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.19.0#f8f2d5527b3463cc40f3d851172af7ae81db75c1" +version = "0.21.0" +source = "git+https://github.com/stackabletech/operator-rs.git?tag=0.21.0#dbfa6d45fb59fadd17f9b571255c0fdc4c522671" dependencies = [ - "darling", + "darling 0.14.1", "proc-macro2", "quote", "syn", diff --git a/deploy/crd/sparkapplication.crd.yaml b/deploy/crd/sparkapplication.crd.yaml index 99b302ff..f5239cca 100644 --- a/deploy/crd/sparkapplication.crd.yaml +++ b/deploy/crd/sparkapplication.crd.yaml @@ -299,18 +299,51 @@ spec: inline: description: S3 connection definition as CRD. properties: + accessStyle: + description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on " + enum: + - Path + - VirtualHosted + nullable: true + type: string + credentials: + description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient." + nullable: true + properties: + scope: + description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)" + nullable: true + properties: + node: + default: false + type: boolean + pod: + default: false + type: boolean + services: + default: [] + items: + type: string + type: array + type: object + secretClass: + description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials" + type: string + required: + - secretClass + type: object host: + description: Hostname of the S3 server without any protocol or port nullable: true type: string port: + description: Port the S3 server listens on. If not specified the products will determine the port to use. format: uint16 minimum: 0.0 nullable: true type: integer - secretClass: - nullable: true - type: string tls: + description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. nullable: true properties: verification: diff --git a/rust/crd/Cargo.toml b/rust/crd/Cargo.toml index 6b8eeabb..f236b57a 100644 --- a/rust/crd/Cargo.toml +++ b/rust/crd/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/stackabletech/spark-k8s-operator" version = "0.2.0-nightly" [dependencies] -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.19.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.21.0" } semver = "1.0" serde = { version = "1.0", features = ["derive"] } diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs index 47f03d31..579be9eb 100644 --- a/rust/crd/src/constants.rs +++ b/rust/crd/src/constants.rs @@ -21,3 +21,4 @@ pub const ENV_AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID"; pub const ENV_AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY"; pub const ACCESS_KEY_ID: &str = "accessKeyId"; pub const SECRET_ACCESS_KEY: &str = "secretAccessKey"; +pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets"; diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index fbe7bf18..d2937f17 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -4,10 +4,9 @@ pub mod constants; use constants::*; use stackable_operator::builder::VolumeBuilder; -use stackable_operator::commons::s3::{InlinedS3BucketSpec, S3BucketDef}; +use stackable_operator::commons::s3::{InlinedS3BucketSpec, S3BucketDef, S3ConnectionSpec}; use stackable_operator::k8s_openapi::api::core::v1::{ - EmptyDirVolumeSource, EnvVar, EnvVarSource, LocalObjectReference, SecretKeySelector, Volume, - VolumeMount, + EmptyDirVolumeSource, EnvVar, LocalObjectReference, Volume, VolumeMount, }; use std::collections::{BTreeMap, HashMap}; @@ -166,7 +165,7 @@ impl SparkApplication { .map(|req| req.join(" ")) } - pub fn volumes(&self) -> Vec { + pub fn volumes(&self, s3bucket: &Option) -> Vec { let mut result: Vec = self .spec .volumes @@ -191,10 +190,23 @@ impl SparkApplication { .build(), ); } + + let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); + + if let Some(S3ConnectionSpec { + credentials: Some(credentials), + .. + }) = s3_conn + { + result.push(credentials.to_volume("s3-credentials")); + } result } - pub fn executor_volume_mounts(&self) -> Vec { + pub fn executor_volume_mounts( + &self, + s3bucket: &Option, + ) -> Vec { let mut result: Vec = self .spec .executor @@ -221,10 +233,23 @@ impl SparkApplication { }); } + let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); + + if let Some(S3ConnectionSpec { + credentials: Some(_credentials), + .. + }) = s3_conn + { + result.push(VolumeMount { + name: "s3-credentials".into(), + mount_path: S3_SECRET_DIR_NAME.into(), + ..VolumeMount::default() + }); + } result } - pub fn driver_volume_mounts(&self) -> Vec { + pub fn driver_volume_mounts(&self, s3bucket: &Option) -> Vec { let mut result: Vec = self .spec .driver @@ -249,6 +274,20 @@ impl SparkApplication { ..VolumeMount::default() }); } + + let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); + + if let Some(S3ConnectionSpec { + credentials: Some(_credentials), + .. + }) = s3_conn + { + result.push(VolumeMount { + name: "s3-credentials".into(), + mount_path: S3_SECRET_DIR_NAME.into(), + ..VolumeMount::default() + }); + } result } @@ -273,7 +312,30 @@ impl SparkApplication { let mode = self.mode().context(ObjectHasNoDeployModeSnafu)?; let name = self.metadata.name.clone().context(ObjectHasNoNameSnafu)?; - let mut submit_cmd = vec![ + let mut submit_cmd: Vec = vec![]; + + let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); + + if let Some(S3ConnectionSpec { + credentials: Some(_credentials), + .. + }) = s3_conn + { + submit_cmd.push(format!( + "export {env_var}=$(cat {secret_dir}/{file_name}) && ", + env_var = ENV_AWS_ACCESS_KEY_ID, + secret_dir = S3_SECRET_DIR_NAME, + file_name = ACCESS_KEY_ID + )); + submit_cmd.push(format!( + "export {env_var}=$(cat {secret_dir}/{file_name}) && ", + env_var = ENV_AWS_SECRET_ACCESS_KEY, + secret_dir = S3_SECRET_DIR_NAME, + file_name = SECRET_ACCESS_KEY + )); + } + + submit_cmd.extend(vec![ "/stackable/spark/bin/spark-submit".to_string(), "--verbose".to_string(), "--master k8s://https://${KUBERNETES_SERVICE_HOST}:${KUBERNETES_SERVICE_PORT_HTTPS}".to_string(), @@ -287,16 +349,21 @@ impl SparkApplication { format!("--conf spark.kubernetes.driver.container.image={}", self.spec.spark_image.as_ref().context(NoSparkImageSnafu)?), format!("--conf spark.kubernetes.executor.container.image={}", self.spec.spark_image.as_ref().context(NoSparkImageSnafu)?), format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), - ]; + ]); // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management // for possible S3 related properties if let Some(endpoint) = s3bucket.as_ref().and_then(|s3| s3.endpoint()) { submit_cmd.push(format!("--conf spark.hadoop.fs.s3a.endpoint={}", endpoint)); } - if s3bucket.as_ref().and_then(|s3| s3.secret_class()).is_some() { - // We don't use the secret at all here, instead we assume the Self::env() has been - // called and this environment variables are availables. + if s3bucket + .as_ref() + .and_then(|i| i.connection.as_ref()) + .and_then(|c| c.credentials.as_ref()) + .is_some() + { + // We don't use the credentials at all here, instead we assume the Self::env() has been + // called and the environment variables are available. submit_cmd.push(format!( "--conf spark.hadoop.fs.s3a.access.key=${}", ENV_AWS_ACCESS_KEY_ID @@ -350,23 +417,9 @@ impl SparkApplication { Ok(submit_cmd) } - pub fn env(&self, s3bucket: &Option) -> Vec { + pub fn env(&self, _s3bucket: &Option) -> Vec { let tmp = self.spec.env.as_ref(); let mut e: Vec = tmp.iter().flat_map(|e| e.iter()).cloned().collect(); - if let Some(s3) = s3bucket { - if let Some(secret) = s3.secret_class() { - e.push(Self::env_var_from_secret( - ENV_AWS_ACCESS_KEY_ID, - secret.as_ref(), - ACCESS_KEY_ID, - )); - e.push(Self::env_var_from_secret( - ENV_AWS_SECRET_ACCESS_KEY, - secret.as_ref(), - SECRET_ACCESS_KEY, - )); - } - } if self.requirements().is_some() { e.push(EnvVar { name: "PYTHONPATH".to_string(), @@ -379,21 +432,6 @@ impl SparkApplication { e } - fn env_var_from_secret(var_name: &str, secret: &str, secret_key: &str) -> EnvVar { - EnvVar { - name: String::from(var_name), - value_from: Some(EnvVarSource { - secret_key_ref: Some(SecretKeySelector { - name: Some(String::from(secret)), - key: String::from(secret_key), - ..Default::default() - }), - ..Default::default() - }), - ..Default::default() - } - } - pub fn driver_node_selector(&self) -> Option> { self.spec .driver diff --git a/rust/operator-binary/Cargo.toml b/rust/operator-binary/Cargo.toml index 825e686f..f1b85a22 100644 --- a/rust/operator-binary/Cargo.toml +++ b/rust/operator-binary/Cargo.toml @@ -8,7 +8,7 @@ repository = "https://github.com/stackabletech/spark-k8s-operator" version = "0.2.0-nightly" [dependencies] -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.19.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.21.0" } stackable-spark-k8s-crd = { path = "../crd" } anyhow = "1.0" clap = { version = "3.2", features = ["derive"] } @@ -23,5 +23,5 @@ tracing-futures = { version = "0.2", features = ["futures-03"] } [build-dependencies] built = { version = "0.5", features = ["chrono", "git2"] } -stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.19.0" } +stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.21.0" } stackable-spark-k8s-crd = { path = "../crd" } diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 09e8058e..8da342da 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -1,6 +1,7 @@ use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder}; +use stackable_operator::commons::s3::InlinedS3BucketSpec; use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec}; use stackable_operator::k8s_openapi::api::core::v1::{ ConfigMap, ConfigMapVolumeSource, Container, EmptyDirVolumeSource, EnvVar, Pod, PodSpec, @@ -150,8 +151,12 @@ pub async fn reconcile( .into_iter() .flatten() .collect(); - let pod_template_config_map = - pod_template_config_map(&spark_application, init_containers.as_ref(), &env_vars)?; + let pod_template_config_map = pod_template_config_map( + &spark_application, + init_containers.as_ref(), + &env_vars, + &s3bucket, + )?; client .apply_patch( FIELD_MANAGER_SCOPE, @@ -172,6 +177,7 @@ pub async fn reconcile( &job_container, &env_vars, &job_commands, + &s3bucket, )?; client .apply_patch(FIELD_MANAGER_SCOPE, &job, &job) @@ -190,17 +196,16 @@ fn pod_template( env: &[EnvVar], node_selector: Option>, ) -> Result { - let mut container = ContainerBuilder::new(container_name); - container - .add_volume_mounts(volume_mounts.to_vec()) + let mut cb = ContainerBuilder::new(container_name); + cb.add_volume_mounts(volume_mounts.to_vec()) .add_env_vars(env.to_vec()); if let Some(image_pull_policy) = spark_application.spark_image_pull_policy() { - container.image_pull_policy(image_pull_policy.to_string()); + cb.image_pull_policy(image_pull_policy.to_string()); } let mut pod_spec = PodSpec { - containers: vec![container.build()], + containers: vec![cb.build()], volumes: Some(volumes.to_vec()), ..PodSpec::default() }; @@ -228,15 +233,16 @@ fn pod_template_config_map( spark_application: &SparkApplication, init_containers: &[Container], env: &[EnvVar], + s3bucket: &Option, ) -> Result { - let volumes = spark_application.volumes(); + let volumes = spark_application.volumes(s3bucket); let driver_template = pod_template( spark_application, CONTAINER_NAME_DRIVER, init_containers, volumes.as_ref(), - spark_application.driver_volume_mounts().as_ref(), + spark_application.driver_volume_mounts(s3bucket).as_ref(), env, spark_application.driver_node_selector(), )?; @@ -245,7 +251,7 @@ fn pod_template_config_map( CONTAINER_NAME_EXECUTOR, init_containers, volumes.as_ref(), - spark_application.executor_volume_mounts().as_ref(), + spark_application.executor_volume_mounts(s3bucket).as_ref(), env, spark_application.executor_node_selector(), )?; @@ -279,13 +285,14 @@ fn spark_job( job_container: &Option, env: &[EnvVar], job_commands: &[String], + s3bucket: &Option, ) -> Result { let mut volume_mounts = vec![VolumeMount { name: VOLUME_MOUNT_NAME_POD_TEMPLATES.into(), mount_path: VOLUME_MOUNT_PATH_POD_TEMPLATES.into(), ..VolumeMount::default() }]; - volume_mounts.extend(spark_application.driver_volume_mounts()); + volume_mounts.extend(spark_application.driver_volume_mounts(s3bucket)); if job_container.is_some() { volume_mounts.push(VolumeMount { name: VOLUME_MOUNT_NAME_JOB.into(), @@ -294,9 +301,8 @@ fn spark_job( }) } - let mut container = ContainerBuilder::new("spark-submit"); - container - .image(spark_image) + let mut cb = ContainerBuilder::new("spark-submit"); + cb.image(spark_image) .command(vec!["/bin/bash".to_string()]) .args(vec![ "-c".to_string(), @@ -313,7 +319,7 @@ fn spark_job( }]); if let Some(image_pull_policy) = spark_application.spark_image_pull_policy() { - container.image_pull_policy(image_pull_policy.to_string()); + cb.image_pull_policy(image_pull_policy.to_string()); } let mut volumes = vec![Volume { @@ -324,7 +330,7 @@ fn spark_job( }), ..Volume::default() }]; - volumes.extend(spark_application.volumes()); + volumes.extend(spark_application.volumes(s3bucket)); if job_container.is_some() { volumes.push(Volume { @@ -342,7 +348,7 @@ fn spark_job( .build(), ), spec: Some(PodSpec { - containers: vec![container.build()], + containers: vec![cb.build()], init_containers: job_container.as_ref().map(|c| vec![c.clone()]), restart_policy: Some("Never".to_string()), service_account_name: serviceaccount.metadata.name.clone(), From 4da42c1ee2c61cc64732c55659cb7e40efed1a11 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 20 Jun 2022 17:34:16 +0200 Subject: [PATCH 02/30] wip: working test --- rust/crd/src/lib.rs | 3 +-- rust/operator-binary/src/spark_k8s_controller.rs | 7 ++++--- .../kuttl/spark-pi-private-s3/00-s3-secret.yaml | 12 ++++++++++++ .../spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 3 ++- 4 files changed, 19 insertions(+), 6 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index d2937f17..7156bc0b 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -362,8 +362,7 @@ impl SparkApplication { .and_then(|c| c.credentials.as_ref()) .is_some() { - // We don't use the credentials at all here, instead we assume the Self::env() has been - // called and the environment variables are available. + // We don't use the credentials at all here but assume they are available submit_cmd.push(format!( "--conf spark.hadoop.fs.s3a.access.key=${}", ENV_AWS_ACCESS_KEY_ID diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 8da342da..d6b8a1ed 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -1,5 +1,5 @@ use snafu::{OptionExt, ResultExt, Snafu}; -use stackable_operator::builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder}; +use stackable_operator::builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder, PodSecurityContextBuilder}; use stackable_operator::commons::s3::InlinedS3BucketSpec; use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec}; @@ -207,6 +207,7 @@ fn pod_template( let mut pod_spec = PodSpec { containers: vec![cb.build()], volumes: Some(volumes.to_vec()), + security_context: PodSecurityContextBuilder::new().fs_group(1000).build().into(), // Needed for secret-operator ..PodSpec::default() }; @@ -303,10 +304,9 @@ fn spark_job( let mut cb = ContainerBuilder::new("spark-submit"); cb.image(spark_image) - .command(vec!["/bin/bash".to_string()]) + .command(vec!["/bin/sh".to_string()]) .args(vec![ "-c".to_string(), - "-x".to_string(), job_commands.join(" "), ]) .add_volume_mounts(volume_mounts) @@ -354,6 +354,7 @@ fn spark_job( service_account_name: serviceaccount.metadata.name.clone(), volumes: Some(volumes), image_pull_secrets: spark_application.spark_image_pull_secrets(), + security_context: PodSecurityContextBuilder::new().fs_group(1000).build().into(), // Needed for secret-operator ..PodSpec::default() }), }; diff --git a/tests/templates/kuttl/spark-pi-private-s3/00-s3-secret.yaml b/tests/templates/kuttl/spark-pi-private-s3/00-s3-secret.yaml index 2e32aff6..0b9c799a 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/00-s3-secret.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/00-s3-secret.yaml @@ -3,7 +3,19 @@ apiVersion: v1 kind: Secret metadata: name: minio-credentials + labels: + secrets.stackable.tech/class: s3-credentials-class timeout: 240 stringData: accessKeyId: minioAccessKey secretAccessKey: minioSecretKey +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: s3-credentials-class +spec: + backend: + k8sSearch: + searchNamespace: + pod: {} diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 2fa270ba..3a97d80c 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -16,7 +16,8 @@ spec: inline: host: test-minio port: 9000 - secretClass: minio-credentials + credentials: + secretClass: s3-credentials-class volumes: - name: spark-pi-deps persistentVolumeClaim: From 988a2e24086d1498f6634f3a17bebf9fc25080b2 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 20 Jun 2022 17:36:48 +0200 Subject: [PATCH 03/30] formatting --- .../src/spark_k8s_controller.rs | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index d6b8a1ed..a412a6d4 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -1,5 +1,7 @@ use snafu::{OptionExt, ResultExt, Snafu}; -use stackable_operator::builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder, PodSecurityContextBuilder}; +use stackable_operator::builder::{ + ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder, PodSecurityContextBuilder, +}; use stackable_operator::commons::s3::InlinedS3BucketSpec; use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec}; @@ -207,7 +209,10 @@ fn pod_template( let mut pod_spec = PodSpec { containers: vec![cb.build()], volumes: Some(volumes.to_vec()), - security_context: PodSecurityContextBuilder::new().fs_group(1000).build().into(), // Needed for secret-operator + security_context: PodSecurityContextBuilder::new() + .fs_group(1000) + .build() + .into(), // Needed for secret-operator ..PodSpec::default() }; @@ -305,10 +310,7 @@ fn spark_job( let mut cb = ContainerBuilder::new("spark-submit"); cb.image(spark_image) .command(vec!["/bin/sh".to_string()]) - .args(vec![ - "-c".to_string(), - job_commands.join(" "), - ]) + .args(vec!["-c".to_string(), job_commands.join(" ")]) .add_volume_mounts(volume_mounts) .add_env_vars(env.to_vec()) // TODO: move this to the image @@ -354,7 +356,10 @@ fn spark_job( service_account_name: serviceaccount.metadata.name.clone(), volumes: Some(volumes), image_pull_secrets: spark_application.spark_image_pull_secrets(), - security_context: PodSecurityContextBuilder::new().fs_group(1000).build().into(), // Needed for secret-operator + security_context: PodSecurityContextBuilder::new() + .fs_group(1000) + .build() + .into(), // Needed for secret-operator ..PodSpec::default() }), }; From 3978283683153405e1401f30ec03007da42e969d Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 20 Jun 2022 17:52:57 +0200 Subject: [PATCH 04/30] code cleanup --- rust/crd/src/lib.rs | 41 ++++--------------- .../src/spark_k8s_controller.rs | 2 +- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 7156bc0b..77f2afb4 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -217,35 +217,8 @@ impl SparkApplication { .cloned() .collect(); - if self.spec.image.is_some() { - result.push(VolumeMount { - name: VOLUME_MOUNT_NAME_JOB.into(), - mount_path: VOLUME_MOUNT_PATH_JOB.into(), - ..VolumeMount::default() - }); - } - - if self.requirements().is_some() { - result.push(VolumeMount { - name: VOLUME_MOUNT_NAME_REQ.into(), - mount_path: VOLUME_MOUNT_PATH_REQ.into(), - ..VolumeMount::default() - }); - } - - let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); + self.check_mounts(&mut result, s3bucket); - if let Some(S3ConnectionSpec { - credentials: Some(_credentials), - .. - }) = s3_conn - { - result.push(VolumeMount { - name: "s3-credentials".into(), - mount_path: S3_SECRET_DIR_NAME.into(), - ..VolumeMount::default() - }); - } result } @@ -259,6 +232,13 @@ impl SparkApplication { .flat_map(|v| v.iter()) .cloned() .collect(); + + self.check_mounts(&mut result, s3bucket); + + result + } + + fn check_mounts(&self, result: &mut Vec, s3bucket: &Option) { if self.spec.image.is_some() { result.push(VolumeMount { name: VOLUME_MOUNT_NAME_JOB.into(), @@ -266,7 +246,6 @@ impl SparkApplication { ..VolumeMount::default() }); } - if self.requirements().is_some() { result.push(VolumeMount { name: VOLUME_MOUNT_NAME_REQ.into(), @@ -274,7 +253,6 @@ impl SparkApplication { ..VolumeMount::default() }); } - let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); if let Some(S3ConnectionSpec { @@ -288,7 +266,6 @@ impl SparkApplication { ..VolumeMount::default() }); } - result } pub fn recommended_labels(&self) -> BTreeMap { @@ -416,7 +393,7 @@ impl SparkApplication { Ok(submit_cmd) } - pub fn env(&self, _s3bucket: &Option) -> Vec { + pub fn env(&self) -> Vec { let tmp = self.spec.env.as_ref(); let mut e: Vec = tmp.iter().flat_map(|e| e.iter()).cloned().collect(); if self.requirements().is_some() { diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index a412a6d4..2ac85767 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -147,7 +147,7 @@ pub async fn reconcile( container_builder.build() }); - let env_vars = spark_application.env(&s3bucket); + let env_vars = spark_application.env(); let init_containers: Vec = vec![job_container.clone(), requirements_container.clone()] .into_iter() From 5b435f20715a9fd8f2b0c39d4514248c297694ce Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 20 Jun 2022 18:09:35 +0200 Subject: [PATCH 05/30] regenerate charts --- deploy/helm/spark-k8s-operator/crds/crds.yaml | 39 +++++++++++++++++-- deploy/manifests/crds.yaml | 39 +++++++++++++++++-- 2 files changed, 72 insertions(+), 6 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 0c31374d..1bb64143 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -301,18 +301,51 @@ spec: inline: description: S3 connection definition as CRD. properties: + accessStyle: + description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on " + enum: + - Path + - VirtualHosted + nullable: true + type: string + credentials: + description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient." + nullable: true + properties: + scope: + description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)" + nullable: true + properties: + node: + default: false + type: boolean + pod: + default: false + type: boolean + services: + default: [] + items: + type: string + type: array + type: object + secretClass: + description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials" + type: string + required: + - secretClass + type: object host: + description: Hostname of the S3 server without any protocol or port nullable: true type: string port: + description: Port the S3 server listens on. If not specified the products will determine the port to use. format: uint16 minimum: 0.0 nullable: true type: integer - secretClass: - nullable: true - type: string tls: + description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. nullable: true properties: verification: diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index 0540e287..e907f8ef 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -302,18 +302,51 @@ spec: inline: description: S3 connection definition as CRD. properties: + accessStyle: + description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on " + enum: + - Path + - VirtualHosted + nullable: true + type: string + credentials: + description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient." + nullable: true + properties: + scope: + description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)" + nullable: true + properties: + node: + default: false + type: boolean + pod: + default: false + type: boolean + services: + default: [] + items: + type: string + type: array + type: object + secretClass: + description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials" + type: string + required: + - secretClass + type: object host: + description: Hostname of the S3 server without any protocol or port nullable: true type: string port: + description: Port the S3 server listens on. If not specified the products will determine the port to use. format: uint16 minimum: 0.0 nullable: true type: integer - secretClass: - nullable: true - type: string tls: + description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting. nullable: true properties: verification: From 223408e75802db498ccfbd640fc27d917446b4a4 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 21 Jun 2022 09:40:21 +0200 Subject: [PATCH 06/30] updated changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 25da2e1c..7a752054 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,12 @@ All notable changes to this project will be documented in this file. - Added new fields to govern image pull policy ([#75]) - New `nodeSelector` fields for both the driver and the excutors ([#76]) - Mirror driver pod status to the corresponding spark application ([#77]) +- Use current S3 connection/bucket structs ([#86]) [#75]: https://github.com/stackabletech/spark-k8s-operator/pull/75 [#76]: https://github.com/stackabletech/spark-k8s-operator/pull/76 [#77]: https://github.com/stackabletech/spark-k8s-operator/pull/77 +[#86]: https://github.com/stackabletech/spark-k8s-operator/pull/86 ### Changed From a0cdc2629ceef02ad6ec53e23c151ea0239f03e2 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 21 Jun 2022 10:54:12 +0200 Subject: [PATCH 07/30] use access_style from s3 struct --- rust/crd/src/lib.rs | 35 ++++++++++--------- .../10-deploy-spark-app.yaml.j2 | 2 +- .../10-deploy-spark-app.yaml.j2 | 2 +- .../10-deploy-spark-app.yaml.j2 | 2 +- 4 files changed, 22 insertions(+), 19 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 77f2afb4..acbc5e6f 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -4,7 +4,9 @@ pub mod constants; use constants::*; use stackable_operator::builder::VolumeBuilder; -use stackable_operator::commons::s3::{InlinedS3BucketSpec, S3BucketDef, S3ConnectionSpec}; +use stackable_operator::commons::s3::{ + InlinedS3BucketSpec, S3AccessStyle, S3BucketDef, S3ConnectionSpec, +}; use stackable_operator::k8s_openapi::api::core::v1::{ EmptyDirVolumeSource, EnvVar, LocalObjectReference, Volume, VolumeMount, }; @@ -333,21 +335,22 @@ impl SparkApplication { if let Some(endpoint) = s3bucket.as_ref().and_then(|s3| s3.endpoint()) { submit_cmd.push(format!("--conf spark.hadoop.fs.s3a.endpoint={}", endpoint)); } - if s3bucket - .as_ref() - .and_then(|i| i.connection.as_ref()) - .and_then(|c| c.credentials.as_ref()) - .is_some() - { - // We don't use the credentials at all here but assume they are available - submit_cmd.push(format!( - "--conf spark.hadoop.fs.s3a.access.key=${}", - ENV_AWS_ACCESS_KEY_ID - )); - submit_cmd.push(format!( - "--conf spark.hadoop.fs.s3a.secret.key=${}", - ENV_AWS_SECRET_ACCESS_KEY - )); + + if let Some(conn) = s3bucket.as_ref().and_then(|i| i.connection.as_ref()) { + if let Some(S3AccessStyle::Path) = conn.access_style { + submit_cmd.push("--conf spark.hadoop.fs.s3a.path.style.access=true".to_string()); + } + if conn.credentials.as_ref().is_some() { + // We don't use the credentials at all here but assume they are available + submit_cmd.push(format!( + "--conf spark.hadoop.fs.s3a.access.key=${}", + ENV_AWS_ACCESS_KEY_ID + )); + submit_cmd.push(format!( + "--conf spark.hadoop.fs.s3a.secret.key=${}", + ENV_AWS_SECRET_ACCESS_KEY + )); + } } // conf arguments that are not driver or executor specific diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 4eb813cf..07e3cf17 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -34,9 +34,9 @@ spec: inline: host: test-minio port: 9000 + accessStyle: Path sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" - spark.hadoop.fs.s3a.path.style.access: "true" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-{{ test_scenario['values']['hadoop'] }}.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" spark.executor.extraClassPath: "/dependencies/jars/hadoop-aws-{{ test_scenario['values']['hadoop'] }}.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" driver: diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 3a97d80c..2ced72ce 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -16,6 +16,7 @@ spec: inline: host: test-minio port: 9000 + accessStyle: Path credentials: secretClass: s3-credentials-class volumes: @@ -24,7 +25,6 @@ spec: claimName: spark-pi-pvc sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" - spark.hadoop.fs.s3a.path.style.access: "true" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" spark.executor.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" driver: diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 427205fe..f7143249 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -21,9 +21,9 @@ spec: inline: host: test-minio port: 9000 + accessStyle: Path sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider" - spark.hadoop.fs.s3a.path.style.access: "true" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-{{ test_scenario['values']['hadoop'] }}.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" spark.executor.extraClassPath: "/dependencies/jars/hadoop-aws-{{ test_scenario['values']['hadoop'] }}.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" driver: From 5af14ca648af52572eafb66ace9f21e05734d20c Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 21 Jun 2022 14:42:39 +0200 Subject: [PATCH 08/30] log out warning if tls is specified --- rust/crd/src/lib.rs | 36 +++++++++++++++++++ .../src/spark_k8s_controller.rs | 7 ++++ 2 files changed, 43 insertions(+) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index acbc5e6f..9a6a8bc1 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -498,6 +498,10 @@ mod tests { use crate::ImagePullPolicy; use crate::LocalObjectReference; use crate::SparkApplication; + use stackable_operator::commons::s3::{ + S3AccessStyle, S3BucketSpec, S3ConnectionDef, S3ConnectionSpec, + }; + use stackable_operator::commons::tls::{Tls, TlsVerification}; use std::str::FromStr; #[test] @@ -728,4 +732,36 @@ spec: ImagePullPolicy::from_str("IfNotPresent").unwrap() ); } + + #[test] + fn test_ser_inline() { + let bucket = S3BucketSpec { + bucket_name: Some("test-bucket-name".to_owned()), + connection: Some(S3ConnectionDef::Inline(S3ConnectionSpec { + host: Some("host".to_owned()), + port: Some(8080), + credentials: None, + access_style: Some(S3AccessStyle::VirtualHosted), + tls: Some(Tls { + verification: TlsVerification::None {}, + }), + })), + }; + + assert_eq!( + serde_yaml::to_string(&bucket).unwrap(), + "--- +bucketName: test-bucket-name +connection: + inline: + host: host + port: 8080 + accessStyle: VirtualHosted + tls: + verification: + none: {} +" + .to_owned() + ) + } } diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 2ac85767..096c084c 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -101,6 +101,13 @@ pub async fn reconcile( _ => None, }; + if let Some(conn) = s3bucket.as_ref().and_then(|i| i.connection.as_ref()) { + if conn.tls.as_ref().is_some() { + tracing::warn!("The resource indicates S3-access should use TLS: TLS-verification has not yet been implemented \ + but an HTTPS-endpoint will be used!"); + } + } + let (serviceaccount, rolebinding) = build_spark_role_serviceaccount(&spark_application)?; client .apply_patch(FIELD_MANAGER_SCOPE, &serviceaccount, &serviceaccount) From 927ad7750eced2384413aeb3885b8fffdd315312 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 21 Jun 2022 15:35:24 +0200 Subject: [PATCH 09/30] updated documentation --- .../examples/example-sparkapp-s3-private.yaml | 5 ++-- docs/modules/ROOT/pages/usage.adoc | 24 +++++++++++-------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml b/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml index 761b5de0..3ea32d07 100644 --- a/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml +++ b/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml @@ -16,10 +16,11 @@ spec: inline: host: test-minio port: 9000 - secretClass: minio-credentials # <4> + accessStyle: Path + credentials: # <4> + secretClass: s3-credentials-class sparkConf: # <5> spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" # <6> - spark.hadoop.fs.s3a.path.style.access: "true" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" spark.executor.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" volumes: diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc index 569d9f5b..f6ffbb03 100644 --- a/docs/modules/ROOT/pages/usage.adoc +++ b/docs/modules/ROOT/pages/usage.adoc @@ -2,7 +2,7 @@ == Create an Apache Spark job -If you followed the installation instructions, you should now have a Stackable Operator for Apache Spark up and running and you are ready to create your first Apache Spark kubernetes cluster. +If you followed the installation instructions, you should now have a Stackable Operator for Apache Spark up and running, and you are ready to create your first Apache Spark kubernetes cluster. The example below creates a job running on Apache Spark 3.2.1, using the spark-on-kubernetes paradigm described in the spark documentation. The application file is itself part of the spark distribution and `local` refers to the path on the driver/executors; there are no external dependencies. @@ -64,11 +64,11 @@ include::example$example-sparkapp-external-dependencies.yaml[] include::example$example-sparkapp-image.yaml[] ---- -<1> Job image: this contains the job artifact that will retrieved from the volume mount backed by the PVC +<1> Job image: this contains the job artifact that will be retrieved from the volume mount backed by the PVC <2> Job python artifact (local) <3> Job argument (external) <4> List of python job requirements: these will be installed in the pods via `pip` -<5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in s3) +<5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in an S3 store) <6> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing <7> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors @@ -81,7 +81,7 @@ include::example$example-sparkapp-pvc.yaml[] <1> Job artifact located on S3. <2> Job main class -<3> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in s3, accessed without credentials) +<3> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in an S3 store, accessed without credentials) <4> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing <5> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors @@ -92,12 +92,12 @@ include::example$example-sparkapp-pvc.yaml[] include::example$example-sparkapp-s3-private.yaml[] ---- -<1> Job python artifact (located in S3) +<1> Job python artifact (located in an S3 store) <2> Artifact class <3> S3 section, specifying the existing secret and S3 end-point (in this case, MinIO) -<4> Credentials secret +<4> Credentials referencing a secretClass (not shown in is example) <5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources... -<6> ...in this case, in s3, accessed with the credentials defined in the secret +<6> ...in this case, in an S3 store, accessed with the credentials defined in the secret <7> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing <8> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors @@ -121,7 +121,7 @@ include::example$example-sparkapp-configmap.yaml[] == S3 bucket specification -You can specify S3 connection details directly inside the `SparkApplication` specification or by refering to an external `S3Bucket` custom resource. +You can specify S3 connection details directly inside the `SparkApplication` specification or by referring to an external `S3Bucket` custom resource. To specify S3 connection details directly as part of the `SparkApplication` resource you add an inline bucket configuration as shown below. @@ -134,7 +134,9 @@ s3bucket: # <1> inline: host: test-minio # <3> port: 9000 # <4> - secretClass: minio-credentials # <5> + accessStyle: Path + credentials: + secretClass: s3-credentials-class # <5> ---- <1> Entry point for the bucket configuration. <2> Bucket name. @@ -166,7 +168,9 @@ spec: inline: host: test-minio port: 9000 - secretClass: minio-credentials + accessStyle: Path + credentials: + secretClass: minio-credentials-class ---- This has the advantage that bucket configuration can be shared across `SparkApplication`s and reduces the cost of updating these details. From 309485b8fa48363ced45b01d9c9e665a650cf346 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 22 Jun 2022 08:37:23 +0200 Subject: [PATCH 10/30] updated changelog --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a752054..1e0f31cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,9 +7,9 @@ All notable changes to this project will be documented in this file. ### Added - Added new fields to govern image pull policy ([#75]) -- New `nodeSelector` fields for both the driver and the excutors ([#76]) +- New `nodeSelector` fields for both the driver and the executors ([#76]) - Mirror driver pod status to the corresponding spark application ([#77]) -- Use current S3 connection/bucket structs ([#86]) +- BREAKING: Use current S3 connection/bucket structs ([#86]) [#75]: https://github.com/stackabletech/spark-k8s-operator/pull/75 [#76]: https://github.com/stackabletech/spark-k8s-operator/pull/76 From e662046797fa6aad93cbdc79051c694c6d01ebf8 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Wed, 22 Jun 2022 09:41:48 +0200 Subject: [PATCH 11/30] corrected changelog --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70da2692..c5615e3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +### Added + +### Changed + +- BREAKING: Use current S3 connection/bucket structs ([#86]) + +[#86]: https://github.com/stackabletech/spark-k8s-operator/pull/86 + ## [0.2.0] - 2022-06-21 ### Added @@ -11,12 +19,10 @@ All notable changes to this project will be documented in this file. - Added new fields to govern image pull policy ([#75]) - New `nodeSelector` fields for both the driver and the executors ([#76]) - Mirror driver pod status to the corresponding spark application ([#77]) -- BREAKING: Use current S3 connection/bucket structs ([#86]) [#75]: https://github.com/stackabletech/spark-k8s-operator/pull/75 [#76]: https://github.com/stackabletech/spark-k8s-operator/pull/76 [#77]: https://github.com/stackabletech/spark-k8s-operator/pull/77 -[#86]: https://github.com/stackabletech/spark-k8s-operator/pull/86 ### Changed From 05287e1930bc0c292348461af7b7ac3df10c2b47 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 10:24:53 +0200 Subject: [PATCH 12/30] test mixed access modes --- tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 1 + .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 1 + tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 1 + .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 1 + tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 1 + .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 1 + 6 files changed, 6 insertions(+) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index bdcd9577..e08f7396 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -6,6 +6,7 @@ metadata: spec: accessModes: - ReadWriteOnce + - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 07e3cf17..f8f978e5 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -22,6 +22,7 @@ spec: - name: spark-ny-deps persistentVolumeClaim: claimName: spark-ny-pvc + readOnly: true - name: cm-job-arguments configMap: name: cm-job-arguments diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 8e7f5783..2fb647a5 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -6,6 +6,7 @@ metadata: spec: accessModes: - ReadWriteOnce + - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 2ced72ce..6f540b7f 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -23,6 +23,7 @@ spec: - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-pvc + readOnly: true sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 8e7f5783..2fb647a5 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -6,6 +6,7 @@ metadata: spec: accessModes: - ReadWriteOnce + - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index f7143249..6be20a20 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -14,6 +14,7 @@ spec: - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-pvc + readOnly: true s3bucket: inline: bucketName: my-bucket From b938193c3b500f19f711881eed921d6fe3786411 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 14:20:13 +0200 Subject: [PATCH 13/30] renamed function --- rust/crd/src/lib.rs | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 9a6a8bc1..8ad1364e 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -219,8 +219,7 @@ impl SparkApplication { .cloned() .collect(); - self.check_mounts(&mut result, s3bucket); - + result = self.add_common_volume_mounts(result.to_owned(), s3bucket); result } @@ -235,21 +234,20 @@ impl SparkApplication { .cloned() .collect(); - self.check_mounts(&mut result, s3bucket); - + result = self.add_common_volume_mounts(result.to_owned(), s3bucket); result } - fn check_mounts(&self, result: &mut Vec, s3bucket: &Option) { + fn add_common_volume_mounts(&self, mut mounts: Vec, s3bucket: &Option) -> Vec { if self.spec.image.is_some() { - result.push(VolumeMount { + mounts.push(VolumeMount { name: VOLUME_MOUNT_NAME_JOB.into(), mount_path: VOLUME_MOUNT_PATH_JOB.into(), ..VolumeMount::default() }); } if self.requirements().is_some() { - result.push(VolumeMount { + mounts.push(VolumeMount { name: VOLUME_MOUNT_NAME_REQ.into(), mount_path: VOLUME_MOUNT_PATH_REQ.into(), ..VolumeMount::default() @@ -262,12 +260,13 @@ impl SparkApplication { .. }) = s3_conn { - result.push(VolumeMount { + mounts.push(VolumeMount { name: "s3-credentials".into(), mount_path: S3_SECRET_DIR_NAME.into(), ..VolumeMount::default() }); } + mounts } pub fn recommended_labels(&self) -> BTreeMap { From f527b7dfd6141221448ee4db71e07b1e83dddf2b Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 15:00:15 +0200 Subject: [PATCH 14/30] changes following review --- rust/crd/src/lib.rs | 15 ++++++++++--- .../src/spark_k8s_controller.rs | 21 +++++++++++++++++++ 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 8ad1364e..45a242c3 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -238,7 +238,11 @@ impl SparkApplication { result } - fn add_common_volume_mounts(&self, mut mounts: Vec, s3bucket: &Option) -> Vec { + fn add_common_volume_mounts( + &self, + mut mounts: Vec, + s3bucket: &Option, + ) -> Vec { if self.spec.image.is_some() { mounts.push(VolumeMount { name: VOLUME_MOUNT_NAME_JOB.into(), @@ -336,8 +340,13 @@ impl SparkApplication { } if let Some(conn) = s3bucket.as_ref().and_then(|i| i.connection.as_ref()) { - if let Some(S3AccessStyle::Path) = conn.access_style { - submit_cmd.push("--conf spark.hadoop.fs.s3a.path.style.access=true".to_string()); + match conn.access_style { + Some(S3AccessStyle::Path) => { + submit_cmd + .push("--conf spark.hadoop.fs.s3a.path.style.access=true".to_string()); + } + Some(S3AccessStyle::VirtualHosted) => {} + None => {} } if conn.credentials.as_ref().is_some() { // We don't use the credentials at all here but assume they are available diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 096c084c..71411a8e 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -4,6 +4,7 @@ use stackable_operator::builder::{ }; use stackable_operator::commons::s3::InlinedS3BucketSpec; +use stackable_operator::commons::tls::{CaCert, TlsVerification}; use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec}; use stackable_operator::k8s_openapi::api::core::v1::{ ConfigMap, ConfigMapVolumeSource, Container, EmptyDirVolumeSource, EnvVar, Pod, PodSpec, @@ -74,6 +75,10 @@ pub enum Error { S3Bucket { source: stackable_operator::error::Error, }, + #[snafu(display("tls non-verification not supported"))] + S3TlsNoVerificationNotSupported, + #[snafu(display("ca-cert verification not supported"))] + S3TlsCaVerificationNotSupported, } type Result = std::result::Result; @@ -101,6 +106,22 @@ pub async fn reconcile( _ => None, }; + if let Some(conn) = s3bucket.as_ref().and_then(|i| i.connection.as_ref()) { + if let Some(tls) = &conn.tls { + match &tls.verification { + TlsVerification::None {} => return S3TlsNoVerificationNotSupportedSnafu.fail(), + TlsVerification::Server(server_verification) => { + match &server_verification.ca_cert { + CaCert::WebPki {} => {} + CaCert::SecretClass(_) => { + return S3TlsCaVerificationNotSupportedSnafu.fail() + } + } + } + } + } + } + if let Some(conn) = s3bucket.as_ref().and_then(|i| i.connection.as_ref()) { if conn.tls.as_ref().is_some() { tracing::warn!("The resource indicates S3-access should use TLS: TLS-verification has not yet been implemented \ From cf47871068e2d1fcd0c9071ea99bd915d907edfd Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 16:00:38 +0200 Subject: [PATCH 15/30] Update rust/crd/src/lib.rs Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> --- rust/crd/src/lib.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 45a242c3..61a62e72 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -234,8 +234,7 @@ impl SparkApplication { .cloned() .collect(); - result = self.add_common_volume_mounts(result.to_owned(), s3bucket); - result + self.add_common_volume_mounts(result.to_owned(), s3bucket) } fn add_common_volume_mounts( From 4aa7ccda9386c262a7ae6315ada66a3521f12d5e Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 16:03:43 +0200 Subject: [PATCH 16/30] minor cleanup --- rust/crd/src/lib.rs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 61a62e72..49c26341 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -209,7 +209,7 @@ impl SparkApplication { &self, s3bucket: &Option, ) -> Vec { - let mut result: Vec = self + let result: Vec = self .spec .executor .as_ref() @@ -219,12 +219,11 @@ impl SparkApplication { .cloned() .collect(); - result = self.add_common_volume_mounts(result.to_owned(), s3bucket); - result + self.add_common_volume_mounts(result, s3bucket) } pub fn driver_volume_mounts(&self, s3bucket: &Option) -> Vec { - let mut result: Vec = self + let result: Vec = self .spec .driver .as_ref() @@ -234,7 +233,7 @@ impl SparkApplication { .cloned() .collect(); - self.add_common_volume_mounts(result.to_owned(), s3bucket) + self.add_common_volume_mounts(result, s3bucket) } fn add_common_volume_mounts( From 5131dea10d8ff25ab4602b09fd56dfd89a03757f Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 16:20:25 +0200 Subject: [PATCH 17/30] refactoring of secret/access keys as per review suggestion --- rust/crd/src/constants.rs | 2 -- rust/crd/src/lib.rs | 31 ++++++------------------------- 2 files changed, 6 insertions(+), 27 deletions(-) diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs index 579be9eb..ffe871e3 100644 --- a/rust/crd/src/constants.rs +++ b/rust/crd/src/constants.rs @@ -17,8 +17,6 @@ pub const CONTAINER_NAME_DRIVER: &str = "spark-driver"; pub const CONTAINER_IMAGE_NAME_EXECUTOR: &str = "dummy-overwritten-by-command-line"; pub const CONTAINER_NAME_EXECUTOR: &str = "spark-executor"; -pub const ENV_AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID"; -pub const ENV_AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY"; pub const ACCESS_KEY_ID: &str = "accessKeyId"; pub const SECRET_ACCESS_KEY: &str = "secretAccessKey"; pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets"; diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 49c26341..a586814b 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -294,27 +294,6 @@ impl SparkApplication { let mut submit_cmd: Vec = vec![]; - let s3_conn = s3bucket.as_ref().and_then(|i| i.connection.as_ref()); - - if let Some(S3ConnectionSpec { - credentials: Some(_credentials), - .. - }) = s3_conn - { - submit_cmd.push(format!( - "export {env_var}=$(cat {secret_dir}/{file_name}) && ", - env_var = ENV_AWS_ACCESS_KEY_ID, - secret_dir = S3_SECRET_DIR_NAME, - file_name = ACCESS_KEY_ID - )); - submit_cmd.push(format!( - "export {env_var}=$(cat {secret_dir}/{file_name}) && ", - env_var = ENV_AWS_SECRET_ACCESS_KEY, - secret_dir = S3_SECRET_DIR_NAME, - file_name = SECRET_ACCESS_KEY - )); - } - submit_cmd.extend(vec![ "/stackable/spark/bin/spark-submit".to_string(), "--verbose".to_string(), @@ -349,12 +328,14 @@ impl SparkApplication { if conn.credentials.as_ref().is_some() { // We don't use the credentials at all here but assume they are available submit_cmd.push(format!( - "--conf spark.hadoop.fs.s3a.access.key=${}", - ENV_AWS_ACCESS_KEY_ID + "--conf spark.hadoop.fs.s3a.access.key=$(cat {secret_dir}/{file_name})", + secret_dir = S3_SECRET_DIR_NAME, + file_name = ACCESS_KEY_ID )); submit_cmd.push(format!( - "--conf spark.hadoop.fs.s3a.secret.key=${}", - ENV_AWS_SECRET_ACCESS_KEY + "--conf spark.hadoop.fs.s3a.secret.key=$(cat {secret_dir}/{file_name})", + secret_dir = S3_SECRET_DIR_NAME, + file_name = SECRET_ACCESS_KEY )); } } From 748d188f0179d104f7403a3e86d969b343412961 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 16:39:59 +0200 Subject: [PATCH 18/30] reverted earlier changes --- tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 1 - .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 1 - tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 1 - .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 1 - tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 1 - .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 1 - 6 files changed, 6 deletions(-) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index e08f7396..bdcd9577 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -6,7 +6,6 @@ metadata: spec: accessModes: - ReadWriteOnce - - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index f8f978e5..07e3cf17 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -22,7 +22,6 @@ spec: - name: spark-ny-deps persistentVolumeClaim: claimName: spark-ny-pvc - readOnly: true - name: cm-job-arguments configMap: name: cm-job-arguments diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 2fb647a5..8e7f5783 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -6,7 +6,6 @@ metadata: spec: accessModes: - ReadWriteOnce - - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 6f540b7f..2ced72ce 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -23,7 +23,6 @@ spec: - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-pvc - readOnly: true sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 2fb647a5..8e7f5783 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -6,7 +6,6 @@ metadata: spec: accessModes: - ReadWriteOnce - - ReadOnlyMany resources: requests: storage: 1Gi diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 6be20a20..f7143249 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -14,7 +14,6 @@ spec: - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-pvc - readOnly: true s3bucket: inline: bucketName: my-bucket From 8ff8078653bfdaefa2a10357f60b6984a88fdc51 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Fri, 24 Jun 2022 16:57:17 +0200 Subject: [PATCH 19/30] specify node selection --- tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 2 ++ .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 4 ++++ tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 2 ++ .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 4 ++++ tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 2 ++ .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 4 ++++ 6 files changed, 18 insertions(+) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index bdcd9577..c8e81a73 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -17,6 +17,8 @@ metadata: spec: template: spec: + nodeSelector: + node: "1" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 07e3cf17..7e3a98ce 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -48,6 +48,8 @@ spec: mountPath: /dependencies - name: cm-job-arguments mountPath: /arguments + nodeSelector: + node: "1" executor: cores: 1 instances: 3 @@ -57,3 +59,5 @@ spec: mountPath: /dependencies - name: cm-job-arguments mountPath: /arguments + nodeSelector: + node: "1" diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 8e7f5783..2acf0616 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -17,6 +17,8 @@ metadata: spec: template: spec: + nodeSelector: + node: "1" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 2ced72ce..720d2d5d 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -34,6 +34,8 @@ spec: volumeMounts: - name: spark-pi-deps mountPath: /dependencies + nodeSelector: + node: "1" executor: cores: 1 instances: 1 @@ -41,3 +43,5 @@ spec: volumeMounts: - name: spark-pi-deps mountPath: /dependencies + nodeSelector: + node: "1" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 8e7f5783..2acf0616 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -17,6 +17,8 @@ metadata: spec: template: spec: + nodeSelector: + node: "1" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index f7143249..afc03352 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -33,6 +33,8 @@ spec: volumeMounts: - name: spark-pi-deps mountPath: /dependencies + nodeSelector: + node: "1" executor: cores: 1 instances: 1 @@ -40,3 +42,5 @@ spec: volumeMounts: - name: spark-pi-deps mountPath: /dependencies + nodeSelector: + node: "1" From 6eeade42e45eb961a54b014c85ca67d00768d122 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 11:31:03 +0200 Subject: [PATCH 20/30] use different nodes for each job --- tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 2 +- .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 4 ++-- tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 2 +- .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 2acf0616..f856ed08 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -18,7 +18,7 @@ spec: template: spec: nodeSelector: - node: "1" + node: "2" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 720d2d5d..a6b09c91 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -35,7 +35,7 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "1" + node: "2" executor: cores: 1 instances: 1 @@ -44,4 +44,4 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "1" + node: "2" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 2acf0616..f0cb4c78 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -18,7 +18,7 @@ spec: template: spec: nodeSelector: - node: "1" + node: "3" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index afc03352..60d124bd 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -34,7 +34,7 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "1" + node: "3" executor: cores: 1 instances: 1 @@ -43,4 +43,4 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "1" + node: "3" From 9a2912da44f5dec40db21f94b337860a65e59ceb Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 12:03:50 +0200 Subject: [PATCH 21/30] applied TTL to job --- tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 1 + tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 1 + tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 1 + 3 files changed, 3 insertions(+) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index c8e81a73..e954e5e2 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -15,6 +15,7 @@ kind: Job metadata: name: spark-ny-deps-job spec: + ttlSecondsAfterFinished: 10 template: spec: nodeSelector: diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index f856ed08..3e4e6d39 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -15,6 +15,7 @@ kind: Job metadata: name: spark-pi-deps-job spec: + ttlSecondsAfterFinished: 10 template: spec: nodeSelector: diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index f0cb4c78..055ded29 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -15,6 +15,7 @@ kind: Job metadata: name: spark-pi-deps-job spec: + ttlSecondsAfterFinished: 10 template: spec: nodeSelector: From edc20d65c64dbc7cfdbbf412f45399d00e9a6a81 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 13:42:47 +0200 Subject: [PATCH 22/30] job-specific naming --- .../kuttl/spark-ny-public-s3/02-deps-volume.yaml | 4 ++-- .../kuttl/spark-pi-private-s3/02-deps-volume.yaml | 10 +++++----- .../spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 8 ++++---- .../kuttl/spark-pi-public-s3/02-deps-volume.yaml | 10 +++++----- .../spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 8 ++++---- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index e954e5e2..fa1ff3e3 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -22,7 +22,7 @@ spec: node: "1" restartPolicy: Never volumes: - - name: job-deps + - name: job-deps-ny persistentVolumeClaim: claimName: spark-ny-pvc containers: @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps + - name: job-deps-ny mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 3e4e6d39..826348ad 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: spark-pi-pvc + name: spark-pi-private-pvc spec: accessModes: - ReadWriteOnce @@ -13,7 +13,7 @@ spec: apiVersion: batch/v1 kind: Job metadata: - name: spark-pi-deps-job + name: spark-pi-private-deps-job spec: ttlSecondsAfterFinished: 10 template: @@ -22,9 +22,9 @@ spec: node: "2" restartPolicy: Never volumes: - - name: job-deps + - name: job-deps-pi-private persistentVolumeClaim: - claimName: spark-pi-pvc + claimName: spark-pi-private-pvc containers: - name: aws-deps image: docker.stackable.tech/stackable/tools:0.2.0-stackable0 @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps + - name: job-deps-pi-private mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index a6b09c91..447dee02 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -20,9 +20,9 @@ spec: credentials: secretClass: s3-credentials-class volumes: - - name: spark-pi-deps + - name: spark-pi-private-deps persistentVolumeClaim: - claimName: spark-pi-pvc + claimName: spark-pi-private-pvc sparkConf: spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar" @@ -32,7 +32,7 @@ spec: coreLimit: "1200m" memory: "512m" volumeMounts: - - name: spark-pi-deps + - name: spark-pi-private-deps mountPath: /dependencies nodeSelector: node: "2" @@ -41,7 +41,7 @@ spec: instances: 1 memory: "512m" volumeMounts: - - name: spark-pi-deps + - name: spark-pi-private-deps mountPath: /dependencies nodeSelector: node: "2" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 055ded29..5cd21728 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -2,7 +2,7 @@ apiVersion: v1 kind: PersistentVolumeClaim metadata: - name: spark-pi-pvc + name: spark-pi-public-pvc spec: accessModes: - ReadWriteOnce @@ -13,7 +13,7 @@ spec: apiVersion: batch/v1 kind: Job metadata: - name: spark-pi-deps-job + name: spark-pi-public-deps-job spec: ttlSecondsAfterFinished: 10 template: @@ -22,9 +22,9 @@ spec: node: "3" restartPolicy: Never volumes: - - name: job-deps + - name: job-deps-pi-public persistentVolumeClaim: - claimName: spark-pi-pvc + claimName: spark-pi-public-pvc containers: - name: aws-deps image: docker.stackable.tech/stackable/tools:0.2.0-stackable0 @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps + - name: job-deps-pi-public mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 60d124bd..ac1a833b 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -11,9 +11,9 @@ spec: mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar volumes: - - name: spark-pi-deps + - name: spark-pi-public-deps persistentVolumeClaim: - claimName: spark-pi-pvc + claimName: spark-pi-public-pvc s3bucket: inline: bucketName: my-bucket @@ -31,7 +31,7 @@ spec: coreLimit: "1200m" memory: "512m" volumeMounts: - - name: spark-pi-deps + - name: spark-pi-public-deps mountPath: /dependencies nodeSelector: node: "3" @@ -40,7 +40,7 @@ spec: instances: 1 memory: "512m" volumeMounts: - - name: spark-pi-deps + - name: spark-pi-public-deps mountPath: /dependencies nodeSelector: node: "3" From efc1bf4f67254ee95c4e0e70d2f295abf39000b7 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 14:42:26 +0200 Subject: [PATCH 23/30] consolidate changes --- .../templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 6 +++--- .../templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 6 +++--- .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 6 +++--- .../templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 6 +++--- .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index fa1ff3e3..5b835807 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -15,14 +15,14 @@ kind: Job metadata: name: spark-ny-deps-job spec: - ttlSecondsAfterFinished: 10 + #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: node: "1" restartPolicy: Never volumes: - - name: job-deps-ny + - name: job-deps persistentVolumeClaim: claimName: spark-ny-pvc containers: @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps-ny + - name: job-deps mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 826348ad..00e05d1a 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -13,7 +13,7 @@ spec: apiVersion: batch/v1 kind: Job metadata: - name: spark-pi-private-deps-job + name: spark-pi-deps-job spec: ttlSecondsAfterFinished: 10 template: @@ -22,7 +22,7 @@ spec: node: "2" restartPolicy: Never volumes: - - name: job-deps-pi-private + - name: job-deps persistentVolumeClaim: claimName: spark-pi-private-pvc containers: @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps-pi-private + - name: job-deps mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 447dee02..637d0719 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -20,7 +20,7 @@ spec: credentials: secretClass: s3-credentials-class volumes: - - name: spark-pi-private-deps + - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-private-pvc sparkConf: @@ -32,7 +32,7 @@ spec: coreLimit: "1200m" memory: "512m" volumeMounts: - - name: spark-pi-private-deps + - name: spark-pi-deps mountPath: /dependencies nodeSelector: node: "2" @@ -41,7 +41,7 @@ spec: instances: 1 memory: "512m" volumeMounts: - - name: spark-pi-private-deps + - name: spark-pi-deps mountPath: /dependencies nodeSelector: node: "2" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 5cd21728..3813276e 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -13,7 +13,7 @@ spec: apiVersion: batch/v1 kind: Job metadata: - name: spark-pi-public-deps-job + name: spark-pi-deps-job spec: ttlSecondsAfterFinished: 10 template: @@ -22,7 +22,7 @@ spec: node: "3" restartPolicy: Never volumes: - - name: job-deps-pi-public + - name: job-deps persistentVolumeClaim: claimName: spark-pi-public-pvc containers: @@ -45,7 +45,7 @@ spec: "mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}", ] volumeMounts: - - name: job-deps-pi-public + - name: job-deps mountPath: /dependencies securityContext: runAsUser: 0 diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index ac1a833b..690a1f35 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -11,7 +11,7 @@ spec: mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar volumes: - - name: spark-pi-public-deps + - name: spark-pi-deps persistentVolumeClaim: claimName: spark-pi-public-pvc s3bucket: @@ -31,7 +31,7 @@ spec: coreLimit: "1200m" memory: "512m" volumeMounts: - - name: spark-pi-public-deps + - name: spark-pi-deps mountPath: /dependencies nodeSelector: node: "3" @@ -40,7 +40,7 @@ spec: instances: 1 memory: "512m" volumeMounts: - - name: spark-pi-public-deps + - name: spark-pi-deps mountPath: /dependencies nodeSelector: node: "3" From 104964bc812fc267b651f035e11b39224dfa1a60 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 14:44:01 +0200 Subject: [PATCH 24/30] commented out TTLs --- tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 2 +- tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 00e05d1a..6951a4a0 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -15,7 +15,7 @@ kind: Job metadata: name: spark-pi-deps-job spec: - ttlSecondsAfterFinished: 10 + #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 3813276e..14b69d1b 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -15,7 +15,7 @@ kind: Job metadata: name: spark-pi-deps-job spec: - ttlSecondsAfterFinished: 10 + #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: From 39aa778ce71ce4c78b12b35feddf22838450405c Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 16:07:33 +0200 Subject: [PATCH 25/30] add node selector to job --- deploy/crd/sparkapplication.crd.yaml | 5 +++++ rust/crd/src/lib.rs | 6 ++++++ rust/operator-binary/src/spark_k8s_controller.rs | 1 + .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 2 ++ .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 2 ++ .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 2 ++ 6 files changed, 18 insertions(+) diff --git a/deploy/crd/sparkapplication.crd.yaml b/deploy/crd/sparkapplication.crd.yaml index f5239cca..8b3b6079 100644 --- a/deploy/crd/sparkapplication.crd.yaml +++ b/deploy/crd/sparkapplication.crd.yaml @@ -272,6 +272,11 @@ spec: mode: nullable: true type: string + nodeSelector: + additionalProperties: + type: string + nullable: true + type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index a586814b..73fef779 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -100,6 +100,8 @@ pub struct SparkApplicationSpec { pub volumes: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub env: Option>, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub node_selector: Option>, } #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize, Display, EnumString)] @@ -411,6 +413,10 @@ impl SparkApplication { .as_ref() .and_then(|executor_config| executor_config.node_selector.clone()) } + + pub fn job_node_selector(&self) -> Option> { + self.spec.node_selector.clone() + } } #[derive(Clone, Debug, Default, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 71411a8e..87fa2586 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -388,6 +388,7 @@ fn spark_job( .fs_group(1000) .build() .into(), // Needed for secret-operator + node_selector: spark_application.job_node_selector(), ..PodSpec::default() }), }; diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 7e3a98ce..03fbfa4d 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -18,6 +18,8 @@ spec: mode: cluster mainClass: tech.stackable.demo.spark.NYTLCReport mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0.jar + nodeSelector: + node: "1" volumes: - name: spark-ny-deps persistentVolumeClaim: diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 637d0719..07f4984d 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -9,6 +9,8 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar + nodeSelector: + node: "2" s3bucket: inline: bucketName: my-bucket diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 690a1f35..6b0f0f33 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -10,6 +10,8 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar + nodeSelector: + node: "3" volumes: - name: spark-pi-deps persistentVolumeClaim: From b15cfc300837a949f130cec7eb652f318601e2da Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 16:11:56 +0200 Subject: [PATCH 26/30] removed commented-out directives --- tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml | 1 - tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 1 - tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 1 - 3 files changed, 3 deletions(-) diff --git a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml index 5b835807..c8e81a73 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-ny-public-s3/02-deps-volume.yaml @@ -15,7 +15,6 @@ kind: Job metadata: name: spark-ny-deps-job spec: - #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 6951a4a0..19d96f77 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -15,7 +15,6 @@ kind: Job metadata: name: spark-pi-deps-job spec: - #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 14b69d1b..78c205b7 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -15,7 +15,6 @@ kind: Job metadata: name: spark-pi-deps-job spec: - #ttlSecondsAfterFinished: 120 template: spec: nodeSelector: From ae5b689b7ada47ad401e61ec0fd8dfad2c394265 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 16:21:10 +0200 Subject: [PATCH 27/30] regenerate charts --- deploy/helm/spark-k8s-operator/crds/crds.yaml | 5 +++++ deploy/manifests/crds.yaml | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 1bb64143..79e8eede 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -274,6 +274,11 @@ spec: mode: nullable: true type: string + nodeSelector: + additionalProperties: + type: string + nullable: true + type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index e907f8ef..c0129915 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -275,6 +275,11 @@ spec: mode: nullable: true type: string + nodeSelector: + additionalProperties: + type: string + nullable: true + type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true From 8a04f5447501da90c3b080e4c27392b3265ad4cc Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Mon, 27 Jun 2022 17:28:26 +0200 Subject: [PATCH 28/30] updated changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5615e3b..cb009544 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,8 +9,10 @@ All notable changes to this project will be documented in this file. ### Changed - BREAKING: Use current S3 connection/bucket structs ([#86]) +- Add node selector to top-level job and specify node selection in PVC-relevant tests ([#90]) [#86]: https://github.com/stackabletech/spark-k8s-operator/pull/86 +[#90]: https://github.com/stackabletech/spark-k8s-operator/pull/90 ## [0.2.0] - 2022-06-21 From 34a6e2d724eccd8f39cb30f03e2dddfe0da2aac0 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 28 Jun 2022 09:22:26 +0200 Subject: [PATCH 29/30] run all jobs on node=1 for Azure tests --- .../templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml | 2 +- .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 6 +++--- .../templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml | 2 +- .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 6 +++--- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml index 19d96f77..7e9edca7 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-private-s3/02-deps-volume.yaml @@ -18,7 +18,7 @@ spec: template: spec: nodeSelector: - node: "2" + node: "1" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 07f4984d..9577f112 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -10,7 +10,7 @@ spec: mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar nodeSelector: - node: "2" + node: "1" s3bucket: inline: bucketName: my-bucket @@ -37,7 +37,7 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "2" + node: "1" executor: cores: 1 instances: 1 @@ -46,4 +46,4 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "2" + node: "1" diff --git a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml index 78c205b7..9880525b 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml +++ b/tests/templates/kuttl/spark-pi-public-s3/02-deps-volume.yaml @@ -18,7 +18,7 @@ spec: template: spec: nodeSelector: - node: "3" + node: "1" restartPolicy: Never volumes: - name: job-deps diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 6b0f0f33..62c06792 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -11,7 +11,7 @@ spec: mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar nodeSelector: - node: "3" + node: "1" volumes: - name: spark-pi-deps persistentVolumeClaim: @@ -36,7 +36,7 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "3" + node: "1" executor: cores: 1 instances: 1 @@ -45,4 +45,4 @@ spec: - name: spark-pi-deps mountPath: /dependencies nodeSelector: - node: "3" + node: "1" From e61f5c3a623497f8abcbd77732cf8461ba7e7de9 Mon Sep 17 00:00:00 2001 From: Andrew Kenworthy Date: Tue, 28 Jun 2022 16:43:47 +0200 Subject: [PATCH 30/30] use driver node selection for job --- deploy/crd/sparkapplication.crd.yaml | 5 ----- deploy/helm/spark-k8s-operator/crds/crds.yaml | 5 ----- deploy/manifests/crds.yaml | 5 ----- rust/crd/src/lib.rs | 6 ------ rust/operator-binary/src/spark_k8s_controller.rs | 2 +- .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 2 -- .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 2 -- .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 2 -- 8 files changed, 1 insertion(+), 28 deletions(-) diff --git a/deploy/crd/sparkapplication.crd.yaml b/deploy/crd/sparkapplication.crd.yaml index 8b3b6079..f5239cca 100644 --- a/deploy/crd/sparkapplication.crd.yaml +++ b/deploy/crd/sparkapplication.crd.yaml @@ -272,11 +272,6 @@ spec: mode: nullable: true type: string - nodeSelector: - additionalProperties: - type: string - nullable: true - type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 79e8eede..1bb64143 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -274,11 +274,6 @@ spec: mode: nullable: true type: string - nodeSelector: - additionalProperties: - type: string - nullable: true - type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index c0129915..e907f8ef 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -275,11 +275,6 @@ spec: mode: nullable: true type: string - nodeSelector: - additionalProperties: - type: string - nullable: true - type: object s3bucket: description: Operators are expected to define fields for this type in order to work with S3 buckets. nullable: true diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 73fef779..a586814b 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -100,8 +100,6 @@ pub struct SparkApplicationSpec { pub volumes: Option>, #[serde(default, skip_serializing_if = "Option::is_none")] pub env: Option>, - #[serde(default, skip_serializing_if = "Option::is_none")] - pub node_selector: Option>, } #[derive(Clone, Debug, Deserialize, Eq, JsonSchema, PartialEq, Serialize, Display, EnumString)] @@ -413,10 +411,6 @@ impl SparkApplication { .as_ref() .and_then(|executor_config| executor_config.node_selector.clone()) } - - pub fn job_node_selector(&self) -> Option> { - self.spec.node_selector.clone() - } } #[derive(Clone, Debug, Default, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 87fa2586..d34bdc75 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -388,7 +388,7 @@ fn spark_job( .fs_group(1000) .build() .into(), // Needed for secret-operator - node_selector: spark_application.job_node_selector(), + node_selector: spark_application.driver_node_selector(), ..PodSpec::default() }), }; diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 03fbfa4d..7e3a98ce 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -18,8 +18,6 @@ spec: mode: cluster mainClass: tech.stackable.demo.spark.NYTLCReport mainApplicationFile: s3a://my-bucket/ny-tlc-report-1.1.0.jar - nodeSelector: - node: "1" volumes: - name: spark-ny-deps persistentVolumeClaim: diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 9577f112..d50ab139 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -9,8 +9,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar - nodeSelector: - node: "1" s3bucket: inline: bucketName: my-bucket diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index 62c06792..c2f7f3c2 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -10,8 +10,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkPi mainApplicationFile: s3a://my-bucket/spark-examples_2.12-{{ test_scenario['values']['spark'] }}.jar - nodeSelector: - node: "1" volumes: - name: spark-pi-deps persistentVolumeClaim: