stackabletech · razvan · Feb 15, 2024 · Feb 12, 2024 · Feb 12, 2024 · Feb 12, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -13,6 +13,10 @@ All notable changes to this project will be documented in this file.
 - Various documentation of the CRD ([#319]).
 - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is
   technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]).
+- [BREAKING] Remove `userClassPathFirst` properties from `spark-submit`. This is an experimental feature that was
+  introduced to support logging in XML format. The side effect of this removal is that the vector agent cannot
+  aggregate output from the `spark-submit` containers. On the other side, it enables dynamic provisionining of
+  java packages (such as Delta Lake) with Stackable stock images which is much more important. ([#355])
 
 ### Fixed
 
@@ -22,6 +26,7 @@ All notable changes to this project will be documented in this file.
 [#313]: https://github.com/stackabletech/spark-k8s-operator/pull/313
 [#319]: https://github.com/stackabletech/spark-k8s-operator/pull/319
 [#344]: https://github.com/stackabletech/spark-k8s-operator/pull/344
+[#355]: https://github.com/stackabletech/spark-k8s-operator/pull/355
 
 ## [23.11.0] - 2023-11-24
 

diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml
@@ -7061,90 +7061,6 @@ spec:
                     config:
                       default: {}
                       properties:
-                        logging:
-                          default:
-                            enableVectorAgent: null
-                            containers: {}
-                          description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging).
-                          properties:
-                            containers:
-                              additionalProperties:
-                                anyOf:
-                                  - required:
-                                      - custom
-                                  - {}
-                                description: Log configuration of the container
-                                properties:
-                                  console:
-                                    description: Configuration for the console appender
-                                    nullable: true
-                                    properties:
-                                      level:
-                                        description: The log level threshold. Log events with a lower log level are discarded.
-                                        enum:
-                                          - TRACE
-                                          - DEBUG
-                                          - INFO
-                                          - WARN
-                                          - ERROR
-                                          - FATAL
-                                          - NONE
-                                        nullable: true
-                                        type: string
-                                    type: object
-                                  custom:
-                                    description: Custom log configuration provided in a ConfigMap
-                                    properties:
-                                      configMap:
-                                        description: ConfigMap containing the log configuration files
-                                        nullable: true
-                                        type: string
-                                    type: object
-                                  file:
-                                    description: Configuration for the file appender
-                                    nullable: true
-                                    properties:
-                                      level:
-                                        description: The log level threshold. Log events with a lower log level are discarded.
-                                        enum:
-                                          - TRACE
-                                          - DEBUG
-                                          - INFO
-                                          - WARN
-                                          - ERROR
-                                          - FATAL
-                                          - NONE
-                                        nullable: true
-                                        type: string
-                                    type: object
-                                  loggers:
-                                    additionalProperties:
-                                      description: Configuration of a logger
-                                      properties:
-                                        level:
-                                          description: The log level threshold. Log events with a lower log level are discarded.
-                                          enum:
-                                            - TRACE
-                                            - DEBUG
-                                            - INFO
-                                            - WARN
-                                            - ERROR
-                                            - FATAL
-                                            - NONE
-                                          nullable: true
-                                          type: string
-                                      type: object
-                                    default: {}
-                                    description: Configuration per logger
-                                    type: object
-                                type: object
-                              description: Log configuration per container.
-                              type: object
-                            enableVectorAgent:
-                              description: Wether or not to deploy a container with the Vector log agent.
-                              nullable: true
-                              type: boolean
-                          type: object
                         resources:
                           default:
                             memory:

diff --git a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc
@@ -0,0 +1,8 @@
+= Logging
+
+The Spark operator installs a https://vector.dev/docs/setup/deployment/roles/#agent[vector agent] as a side-car container in every application Pod except the `job` Pod that runs `spark-submit`. It also configures the logging framework to output logs in XML format. This is the same https://logging.apache.org/log4j/2.x/manual/layouts.html#XMLLayout[format] used across all Stackable products and it enables the https://vector.dev/docs/setup/deployment/roles/#aggregator[vector aggregator] to collect logs across the entire platform.
+
+It is the user's responsibility to install and configure the vector aggregator, but the agents can discover the aggregator automatically using a discovery ConfigMap as described in the xref:concepts:logging.adoc[logging concepts]. 
+
+NOTE: Only logs produced by the application's driver and executors are collected. Logs produced by `spark-submit` are discarded.
+
diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs
@@ -253,7 +253,7 @@ impl SparkApplication {
         &self,
         s3conn: &Option<S3ConnectionSpec>,
         s3logdir: &Option<S3LogDir>,
-        log_config_map: &str,
+        log_config_map: Option<&str>,
     ) -> Result<Vec<Volume>, Error> {
         let mut result: Vec<Volume> = self.spec.volumes.clone();
 
@@ -294,22 +294,24 @@ impl SparkApplication {
             }
         }
 
-        result.push(
-            VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG)
-                .with_config_map(log_config_map)
-                .build(),
-        );
+        if let Some(log_config_map) = log_config_map {
+            result.push(
+                VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG)
+                    .with_config_map(log_config_map)
+                    .build(),
+            );
 
-        result.push(
-            VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG)
-                .with_empty_dir(
-                    None::<String>,
-                    Some(product_logging::framework::calculate_log_volume_size_limit(
-                        &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE],
-                    )),
-                )
-                .build(),
-        );
+            result.push(
+                VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG)
+                    .with_empty_dir(
+                        None::<String>,
+                        Some(product_logging::framework::calculate_log_volume_size_limit(
+                            &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE],
+                        )),
+                    )
+                    .build(),
+            );
+        }
 
         if !self.packages().is_empty() {
             result.push(
@@ -358,14 +360,15 @@ impl SparkApplication {
                 ..VolumeMount::default()
             },
         ];
-        self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir)
+        self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, false)
     }
 
     fn add_common_volume_mounts(
         &self,
         mut mounts: Vec<VolumeMount>,
         s3conn: &Option<S3ConnectionSpec>,
         s3logdir: &Option<S3LogDir>,
+        logging_enabled: bool,
     ) -> Vec<VolumeMount> {
         if self.spec.image.is_some() {
             mounts.push(VolumeMount {
@@ -401,17 +404,19 @@ impl SparkApplication {
             mounts.push(vm);
         }
 
-        mounts.push(VolumeMount {
-            name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(),
-            mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(),
-            ..VolumeMount::default()
-        });
+        if logging_enabled {
+            mounts.push(VolumeMount {
+                name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(),
+                mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(),
+                ..VolumeMount::default()
+            });
 
-        mounts.push(VolumeMount {
-            name: VOLUME_MOUNT_NAME_LOG.into(),
-            mount_path: VOLUME_MOUNT_PATH_LOG.into(),
-            ..VolumeMount::default()
-        });
+            mounts.push(VolumeMount {
+                name: VOLUME_MOUNT_NAME_LOG.into(),
+                mount_path: VOLUME_MOUNT_PATH_LOG.into(),
+                ..VolumeMount::default()
+            });
+        }
 
         if !self.packages().is_empty() {
             mounts.push(VolumeMount {
@@ -484,10 +489,8 @@ impl SparkApplication {
             format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name),
             format!("--conf spark.driver.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"),
             format!("--conf spark.driver.extraClassPath=/stackable/spark/extra-jars/*"),
-            "--conf spark.driver.userClassPathFirst=true".to_string(),
             format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"),
             format!("--conf spark.executor.extraClassPath=/stackable/spark/extra-jars/*"),
-            "--conf spark.executor.userClassPathFirst=true".to_string(),
         ]);
 
         // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management

diff --git a/rust/crd/src/roles.rs b/rust/crd/src/roles.rs
@@ -147,7 +147,7 @@ impl RoleConfig {
         s3logdir: &Option<S3LogDir>,
     ) -> Vec<VolumeMount> {
         let volume_mounts = self.volume_mounts.clone().unwrap_or_default().into();
-        spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir)
+        spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, true)
     }
 }
 
@@ -200,8 +200,6 @@ impl Configuration for RoleConfigFragment {
 pub struct SubmitConfig {
     #[fragment_attrs(serde(default))]
     pub resources: Resources<SparkStorageConfig, NoRuntimeLimits>,
-    #[fragment_attrs(serde(default))]
-    pub logging: Logging<SparkContainer>,
 }
 
 impl SubmitConfig {
@@ -218,7 +216,6 @@ impl SubmitConfig {
                 },
                 storage: SparkStorageConfigFragment {},
             },
-            logging: product_logging::spec::default_logging(),
         }
     }
 }

diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs
@@ -318,8 +318,6 @@ pub async fn reconcile(spark_application: Arc<SparkApplication>, ctx: Arc<Ctx>)
     let submit_job_config_map = submit_job_config_map(
         &spark_application,
         submit_product_config,
-        vector_aggregator_address.as_deref(),
-        &submit_config.logging,
         &resolved_product_image,
     )?;
     client
@@ -580,7 +578,7 @@ fn pod_template_config_map(
     };
 
     let mut volumes = spark_application
-        .volumes(s3conn, s3logdir, &log_config_map)
+        .volumes(s3conn, s3logdir, Some(&log_config_map))
         .context(CreateVolumesSnafu)?;
     volumes.push(
         VolumeBuilder::new(VOLUME_MOUNT_NAME_CONFIG)
@@ -657,8 +655,6 @@ fn pod_template_config_map(
 fn submit_job_config_map(
     spark_application: &SparkApplication,
     product_config: Option<&HashMap<PropertyNameKind, BTreeMap<String, String>>>,
-    vector_aggregator_address: Option<&str>,
-    logging: &Logging<SparkContainer>,
     spark_image: &ResolvedProductImage,
 ) -> Result<ConfigMap> {
     let cm_name = spark_application.submit_job_config_map_name();
@@ -679,20 +675,6 @@ fn submit_job_config_map(
             .build(),
     );
 
-    product_logging::extend_config_map(
-        &RoleGroupRef {
-            cluster: ObjectRef::from_obj(spark_application),
-            role: String::new(),
-            role_group: String::new(),
-        },
-        vector_aggregator_address,
-        logging,
-        SparkContainer::SparkSubmit,
-        SparkContainer::Vector,
-        &mut cm_builder,
-    )
-    .context(InvalidLoggingConfigSnafu { cm_name })?;
-
     if let Some(product_config) = product_config {
         let jvm_sec_props: BTreeMap<String, Option<String>> = product_config
             .get(&PropertyNameKind::File(
@@ -731,27 +713,7 @@ fn spark_job(
     let mut cb = ContainerBuilder::new(&SparkContainer::SparkSubmit.to_string())
         .context(IllegalContainerNameSnafu)?;
 
-    let log_config_map = if let Some(ContainerLogConfig {
-        choice:
-            Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig {
-                custom: ConfigMapLogConfig { config_map },
-            })),
-    }) = job_config
-        .logging
-        .containers
-        .get(&SparkContainer::SparkSubmit)
-    {
-        config_map.into()
-    } else {
-        spark_application.submit_job_config_map_name()
-    };
-
-    let mut args = vec![job_commands.join(" ")];
-    if job_config.logging.enable_vector_agent {
-        // Wait for Vector to gather the logs.
-        args.push("sleep 10".into());
-        args.push(create_vector_shutdown_file_command(VOLUME_MOUNT_PATH_LOG));
-    }
+    let args = vec![job_commands.join(" ")];
 
     cb.image_from_product_image(spark_image)
         .command(vec!["/bin/bash".to_string(), "-c".to_string()])
@@ -762,8 +724,7 @@ fn spark_job(
         .add_env_var(
             "SPARK_SUBMIT_OPTS",
             format!(
-                "-cp /stackable/spark/extra-jars/*:/stackable/spark/jars/* \
-                -Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"
+                "-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"
             ),
         )
         // TODO: move this to the image
@@ -786,26 +747,11 @@ fn spark_job(
     ];
     volumes.extend(
         spark_application
-            .volumes(s3conn, s3logdir, &log_config_map)
+            .volumes(s3conn, s3logdir, None)
             .context(CreateVolumesSnafu)?,
     );
 
-    let mut containers = vec![cb.build()];
-
-    if job_config.logging.enable_vector_agent {
-        containers.push(vector_container(
-            spark_image,
-            VOLUME_MOUNT_NAME_CONFIG,
-            VOLUME_MOUNT_NAME_LOG,
-            job_config.logging.containers.get(&SparkContainer::Vector),
-            ResourceRequirementsBuilder::new()
-                .with_cpu_request("250m")
-                .with_cpu_limit("500m")
-                .with_memory_request("128Mi")
-                .with_memory_limit("128Mi")
-                .build(),
-        ));
-    }
+    let containers = vec![cb.build()];
 
     let mut pod =
         PodTemplateSpec {

diff --git a/tests/templates/kuttl/delta-lake/01-assert.yaml.j2 b/tests/templates/kuttl/delta-lake/01-assert.yaml.j2
@@ -0,0 +1,10 @@
+---
+apiVersion: kuttl.dev/v1beta1
+kind: TestAssert
+{% if lookup('env', 'VECTOR_AGGREGATOR') %}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: vector-aggregator-discovery
+{% endif %}
diff --git a/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2
@@ -0,0 +1,9 @@
+{% if lookup('env', 'VECTOR_AGGREGATOR') %}
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: vector-aggregator-discovery
+data:
+  ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }}
+{% endif %}