stackabletech · siegfriedweber · Mar 10, 2023 · Mar 9, 2023 · Mar 10, 2023 · Mar 22, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
 
 - Deploy default and support custom affinities ([#217])
 - BREAKING: Dropped support for old spec.{driver,executor}.nodeSelector field. Use spec.{driver,executor}.affinity.nodeSelector instead ([#217])
+- Log aggregation added ([#226]).
 
 ### Changed
 
@@ -19,6 +20,7 @@ All notable changes to this project will be documented in this file.
 [#207]: https://github.com/stackabletech/spark-k8s-operator/pull/207
 [#217]: https://github.com/stackabletech/spark-k8s-operator/pull/217
 [#223]: https://github.com/stackabletech/spark-k8s-operator/pull/223
+[#226]: https://github.com/stackabletech/spark-k8s-operator/pull/226
 
 ## [23.1.0] - 2023-01-23
 

diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml
diff --git a/docs/modules/spark-k8s/pages/history_server.adoc b/docs/modules/spark-k8s/pages/history_server.adoc
@@ -48,6 +48,23 @@ include::example$example-history-app.yaml[]
 <6> Credentials used to write event logs. These can, of course, differ from the credentials used to process data.
 
 
+== Log aggregation
+
+The logs can be forwarded to a Vector log aggregator by providing a discovery
+ConfigMap for the aggregator and by enabling the log agent:
+
+[source,yaml]
+----
+spec:
+  vectorAggregatorConfigMapName: vector-aggregator-discovery
+  nodes:
+    config:
+      logging:
+        enableVectorAgent: true
+----
+
+Further information on how to configure logging, can be found in
+xref:home:concepts:logging.adoc[].
 
 == History Web UI
 

diff --git a/docs/modules/spark-k8s/pages/usage.adoc b/docs/modules/spark-k8s/pages/usage.adoc
@@ -177,6 +177,28 @@ Spark allocates a default amount of non-heap memory based on the type of job (JV
 
 NOTE: It is possible to define Spark resources either directly by setting configuration properties listed under `sparkConf`, or by using resource limits. If both are used, then `sparkConf` properties take precedence. It is recommended for the sake of clarity to use *_either_* one *_or_* the other.
 
+== Log aggregation
+
+The logs can be forwarded to a Vector log aggregator by providing a discovery
+ConfigMap for the aggregator and by enabling the log agent:
+
+[source,yaml]
+----
+spec:
+  vectorAggregatorConfigMapName: vector-aggregator-discovery
+  job:
+    logging:
+      enableVectorAgent: true
+  driver:
+    logging:
+      enableVectorAgent: true
+  executor:
+    logging:
+      enableVectorAgent: true
+----
+
+Further information on how to configure logging, can be found in
+xref:home:concepts:logging.adoc[].
 
 == CRD argument coverage
 

diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs
@@ -1,21 +1,29 @@
 pub const APP_NAME: &str = "spark-k8s";
 
-pub const VOLUME_MOUNT_NAME_POD_TEMPLATES: &str = "pod-template";
-pub const VOLUME_MOUNT_PATH_POD_TEMPLATES: &str = "/stackable/spark/pod-templates";
+pub const VOLUME_MOUNT_NAME_DRIVER_POD_TEMPLATES: &str = "driver-pod-template";
+pub const VOLUME_MOUNT_PATH_DRIVER_POD_TEMPLATES: &str = "/stackable/spark/driver-pod-templates";
+
+pub const VOLUME_MOUNT_NAME_EXECUTOR_POD_TEMPLATES: &str = "executor-pod-template";
+pub const VOLUME_MOUNT_PATH_EXECUTOR_POD_TEMPLATES: &str =
+    "/stackable/spark/executor-pod-templates";
+
+pub const POD_TEMPLATE_FILE: &str = "template.yaml";
+
+pub const VOLUME_MOUNT_NAME_CONFIG: &str = "config";
 
-pub const CONTAINER_NAME_JOB: &str = "job";
 pub const VOLUME_MOUNT_NAME_JOB: &str = "job-files";
 pub const VOLUME_MOUNT_PATH_JOB: &str = "/stackable/spark/jobs";
 
-pub const CONTAINER_NAME_REQ: &str = "requirements";
 pub const VOLUME_MOUNT_NAME_REQ: &str = "req-files";
 pub const VOLUME_MOUNT_PATH_REQ: &str = "/stackable/spark/requirements";
 
-pub const CONTAINER_IMAGE_NAME_DRIVER: &str = "dummy-overwritten-by-command-line";
-pub const CONTAINER_NAME_DRIVER: &str = "spark-driver";
+pub const VOLUME_MOUNT_NAME_LOG_CONFIG: &str = "log-config";
+pub const VOLUME_MOUNT_PATH_LOG_CONFIG: &str = "/stackable/log_config";
 
-pub const CONTAINER_IMAGE_NAME_EXECUTOR: &str = "dummy-overwritten-by-command-line";
-pub const CONTAINER_NAME_EXECUTOR: &str = "spark-executor";
+pub const VOLUME_MOUNT_NAME_LOG: &str = "log";
+pub const VOLUME_MOUNT_PATH_LOG: &str = "/stackable/log";
+
+pub const LOG4J2_CONFIG_FILE: &str = "log4j2.properties";
 
 pub const ACCESS_KEY_ID: &str = "accessKey";
 pub const SECRET_ACCESS_KEY: &str = "secretKey";
@@ -25,6 +33,11 @@ pub const MIN_MEMORY_OVERHEAD: u32 = 384;
 pub const JVM_OVERHEAD_FACTOR: f32 = 0.1;
 pub const NON_JVM_OVERHEAD_FACTOR: f32 = 0.4;
 
+pub const MAX_SPARK_LOG_FILES_SIZE_IN_MIB: u32 = 10;
+pub const MAX_INIT_CONTAINER_LOG_FILES_SIZE_IN_MIB: u32 = 1;
+pub const LOG_VOLUME_SIZE_IN_MIB: u32 =
+    MAX_SPARK_LOG_FILES_SIZE_IN_MIB + MAX_INIT_CONTAINER_LOG_FILES_SIZE_IN_MIB;
+
 pub const OPERATOR_NAME: &str = "spark.stackable.tech";
 pub const CONTROLLER_NAME: &str = "sparkapplication";
 pub const POD_DRIVER_CONTROLLER_NAME: &str = "pod-driver";

diff --git a/rust/crd/src/history.rs b/rust/crd/src/history.rs
@@ -27,10 +27,11 @@ use stackable_operator::{
         transform_all_roles_to_config, validate_all_roles_and_groups_config, Configuration,
         ValidatedRoleConfigByPropertyKind,
     },
+    product_logging::{self, spec::Logging},
     role_utils::{Role, RoleGroupRef},
     schemars::{self, JsonSchema},
 };
-use strum::Display;
+use strum::{Display, EnumIter};
 
 #[derive(Snafu, Debug)]
 pub enum Error {
@@ -62,6 +63,10 @@ pub enum Error {
 #[serde(rename_all = "camelCase")]
 pub struct SparkHistoryServerSpec {
     pub image: ProductImage,
+    /// Name of the Vector aggregator discovery ConfigMap.
+    /// It must contain the key `ADDRESS` with the address of the Vector aggregator.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub vector_aggregator_config_map_name: Option<String>,
     pub log_file_directory: LogFileDirectorySpec,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub spark_conf: Option<BTreeMap<String, String>>,
@@ -180,6 +185,26 @@ pub struct S3LogFileDirectorySpec {
 )]
 pub struct HistoryStorageConfig {}
 
+#[derive(
+    Clone,
+    Debug,
+    Deserialize,
+    Display,
+    Eq,
+    EnumIter,
+    JsonSchema,
+    Ord,
+    PartialEq,
+    PartialOrd,
+    Serialize,
+)]
+#[serde(rename_all = "kebab-case")]
+#[strum(serialize_all = "kebab-case")]
+pub enum SparkHistoryServerContainer {
+    SparkHistory,
+    Vector,
+}
+
 #[derive(Clone, Debug, Default, JsonSchema, PartialEq, Fragment)]
 #[fragment_attrs(
     derive(
@@ -200,6 +225,8 @@ pub struct HistoryConfig {
     #[fragment_attrs(serde(default))]
     pub resources: Resources<HistoryStorageConfig, NoRuntimeLimits>,
     #[fragment_attrs(serde(default))]
+    pub logging: Logging<SparkHistoryServerContainer>,
+    #[fragment_attrs(serde(default))]
     pub affinity: StackableAffinity,
 }
 
@@ -218,6 +245,7 @@ impl HistoryConfig {
                 },
                 storage: HistoryStorageConfigFragment {},
             },
+            logging: product_logging::spec::default_logging(),
             affinity: history_affinity(cluster_name),
         }
     }