diff --git a/CHANGELOG.md b/CHANGELOG.md index 91ae7389..3556d671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ All notable changes to this project will be documented in this file. - Various documentation of the CRD ([#319]). - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]). +- [BREAKING] Remove `userClassPathFirst` properties from `spark-submit`. This is an experimental feature that was + introduced to support logging in XML format. The side effect of this removal is that the vector agent cannot + aggregate output from the `spark-submit` containers. On the other side, it enables dynamic provisionining of + java packages (such as Delta Lake) with Stackable stock images which is much more important. ([#355]) ### Fixed @@ -22,6 +26,7 @@ All notable changes to this project will be documented in this file. [#313]: https://github.com/stackabletech/spark-k8s-operator/pull/313 [#319]: https://github.com/stackabletech/spark-k8s-operator/pull/319 [#344]: https://github.com/stackabletech/spark-k8s-operator/pull/344 +[#355]: https://github.com/stackabletech/spark-k8s-operator/pull/355 ## [23.11.0] - 2023-11-24 diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 5d81f478..2a316d89 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -7061,90 +7061,6 @@ spec: config: default: {} properties: - logging: - default: - enableVectorAgent: null - containers: {} - description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). - properties: - containers: - additionalProperties: - anyOf: - - required: - - custom - - {} - description: Log configuration of the container - properties: - console: - description: Configuration for the console appender - nullable: true - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - custom: - description: Custom log configuration provided in a ConfigMap - properties: - configMap: - description: ConfigMap containing the log configuration files - nullable: true - type: string - type: object - file: - description: Configuration for the file appender - nullable: true - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - loggers: - additionalProperties: - description: Configuration of a logger - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - default: {} - description: Configuration per logger - type: object - type: object - description: Log configuration per container. - type: object - enableVectorAgent: - description: Wether or not to deploy a container with the Vector log agent. - nullable: true - type: boolean - type: object resources: default: memory: diff --git a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc new file mode 100644 index 00000000..768d2cba --- /dev/null +++ b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc @@ -0,0 +1,8 @@ += Logging + +The Spark operator installs a https://vector.dev/docs/setup/deployment/roles/#agent[vector agent] as a side-car container in every application Pod except the `job` Pod that runs `spark-submit`. It also configures the logging framework to output logs in XML format. This is the same https://logging.apache.org/log4j/2.x/manual/layouts.html#XMLLayout[format] used across all Stackable products and it enables the https://vector.dev/docs/setup/deployment/roles/#aggregator[vector aggregator] to collect logs across the entire platform. + +It is the user's responsibility to install and configure the vector aggregator, but the agents can discover the aggregator automatically using a discovery ConfigMap as described in the xref:concepts:logging.adoc[logging concepts]. + +NOTE: Only logs produced by the application's driver and executors are collected. Logs produced by `spark-submit` are discarded. + diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index b8ba860a..a27af7d7 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -253,7 +253,7 @@ impl SparkApplication { &self, s3conn: &Option, s3logdir: &Option, - log_config_map: &str, + log_config_map: Option<&str>, ) -> Result, Error> { let mut result: Vec = self.spec.volumes.clone(); @@ -294,22 +294,24 @@ impl SparkApplication { } } - result.push( - VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG) - .with_config_map(log_config_map) - .build(), - ); + if let Some(log_config_map) = log_config_map { + result.push( + VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG) + .with_config_map(log_config_map) + .build(), + ); - result.push( - VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG) - .with_empty_dir( - None::, - Some(product_logging::framework::calculate_log_volume_size_limit( - &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE], - )), - ) - .build(), - ); + result.push( + VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG) + .with_empty_dir( + None::, + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE], + )), + ) + .build(), + ); + } if !self.packages().is_empty() { result.push( @@ -358,7 +360,7 @@ impl SparkApplication { ..VolumeMount::default() }, ]; - self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir) + self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, false) } fn add_common_volume_mounts( @@ -366,6 +368,7 @@ impl SparkApplication { mut mounts: Vec, s3conn: &Option, s3logdir: &Option, + logging_enabled: bool, ) -> Vec { if self.spec.image.is_some() { mounts.push(VolumeMount { @@ -401,17 +404,19 @@ impl SparkApplication { mounts.push(vm); } - mounts.push(VolumeMount { - name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(), - mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(), - ..VolumeMount::default() - }); + if logging_enabled { + mounts.push(VolumeMount { + name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(), + mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(), + ..VolumeMount::default() + }); - mounts.push(VolumeMount { - name: VOLUME_MOUNT_NAME_LOG.into(), - mount_path: VOLUME_MOUNT_PATH_LOG.into(), - ..VolumeMount::default() - }); + mounts.push(VolumeMount { + name: VOLUME_MOUNT_NAME_LOG.into(), + mount_path: VOLUME_MOUNT_PATH_LOG.into(), + ..VolumeMount::default() + }); + } if !self.packages().is_empty() { mounts.push(VolumeMount { @@ -484,10 +489,8 @@ impl SparkApplication { format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), format!("--conf spark.driver.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), format!("--conf spark.driver.extraClassPath=/stackable/spark/extra-jars/*"), - "--conf spark.driver.userClassPathFirst=true".to_string(), format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), format!("--conf spark.executor.extraClassPath=/stackable/spark/extra-jars/*"), - "--conf spark.executor.userClassPathFirst=true".to_string(), ]); // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management diff --git a/rust/crd/src/roles.rs b/rust/crd/src/roles.rs index 9ac3241f..d1b14a6a 100644 --- a/rust/crd/src/roles.rs +++ b/rust/crd/src/roles.rs @@ -147,7 +147,7 @@ impl RoleConfig { s3logdir: &Option, ) -> Vec { let volume_mounts = self.volume_mounts.clone().unwrap_or_default().into(); - spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir) + spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, true) } } @@ -200,8 +200,6 @@ impl Configuration for RoleConfigFragment { pub struct SubmitConfig { #[fragment_attrs(serde(default))] pub resources: Resources, - #[fragment_attrs(serde(default))] - pub logging: Logging, } impl SubmitConfig { @@ -218,7 +216,6 @@ impl SubmitConfig { }, storage: SparkStorageConfigFragment {}, }, - logging: product_logging::spec::default_logging(), } } } diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 8ac476a8..8b3dc344 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -318,8 +318,6 @@ pub async fn reconcile(spark_application: Arc, ctx: Arc) let submit_job_config_map = submit_job_config_map( &spark_application, submit_product_config, - vector_aggregator_address.as_deref(), - &submit_config.logging, &resolved_product_image, )?; client @@ -580,7 +578,7 @@ fn pod_template_config_map( }; let mut volumes = spark_application - .volumes(s3conn, s3logdir, &log_config_map) + .volumes(s3conn, s3logdir, Some(&log_config_map)) .context(CreateVolumesSnafu)?; volumes.push( VolumeBuilder::new(VOLUME_MOUNT_NAME_CONFIG) @@ -657,8 +655,6 @@ fn pod_template_config_map( fn submit_job_config_map( spark_application: &SparkApplication, product_config: Option<&HashMap>>, - vector_aggregator_address: Option<&str>, - logging: &Logging, spark_image: &ResolvedProductImage, ) -> Result { let cm_name = spark_application.submit_job_config_map_name(); @@ -679,20 +675,6 @@ fn submit_job_config_map( .build(), ); - product_logging::extend_config_map( - &RoleGroupRef { - cluster: ObjectRef::from_obj(spark_application), - role: String::new(), - role_group: String::new(), - }, - vector_aggregator_address, - logging, - SparkContainer::SparkSubmit, - SparkContainer::Vector, - &mut cm_builder, - ) - .context(InvalidLoggingConfigSnafu { cm_name })?; - if let Some(product_config) = product_config { let jvm_sec_props: BTreeMap> = product_config .get(&PropertyNameKind::File( @@ -731,27 +713,7 @@ fn spark_job( let mut cb = ContainerBuilder::new(&SparkContainer::SparkSubmit.to_string()) .context(IllegalContainerNameSnafu)?; - let log_config_map = if let Some(ContainerLogConfig { - choice: - Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { - custom: ConfigMapLogConfig { config_map }, - })), - }) = job_config - .logging - .containers - .get(&SparkContainer::SparkSubmit) - { - config_map.into() - } else { - spark_application.submit_job_config_map_name() - }; - - let mut args = vec![job_commands.join(" ")]; - if job_config.logging.enable_vector_agent { - // Wait for Vector to gather the logs. - args.push("sleep 10".into()); - args.push(create_vector_shutdown_file_command(VOLUME_MOUNT_PATH_LOG)); - } + let args = vec![job_commands.join(" ")]; cb.image_from_product_image(spark_image) .command(vec!["/bin/bash".to_string(), "-c".to_string()]) @@ -762,8 +724,7 @@ fn spark_job( .add_env_var( "SPARK_SUBMIT_OPTS", format!( - "-cp /stackable/spark/extra-jars/*:/stackable/spark/jars/* \ - -Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" + "-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" ), ) // TODO: move this to the image @@ -786,26 +747,11 @@ fn spark_job( ]; volumes.extend( spark_application - .volumes(s3conn, s3logdir, &log_config_map) + .volumes(s3conn, s3logdir, None) .context(CreateVolumesSnafu)?, ); - let mut containers = vec![cb.build()]; - - if job_config.logging.enable_vector_agent { - containers.push(vector_container( - spark_image, - VOLUME_MOUNT_NAME_CONFIG, - VOLUME_MOUNT_NAME_LOG, - job_config.logging.containers.get(&SparkContainer::Vector), - ResourceRequirementsBuilder::new() - .with_cpu_request("250m") - .with_cpu_limit("500m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - )); - } + let containers = vec![cb.build()]; let mut pod = PodTemplateSpec { diff --git a/tests/templates/kuttl/delta-lake/01-assert.yaml.j2 b/tests/templates/kuttl/delta-lake/01-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/01-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/delta-lake/10-s3-secret.yaml b/tests/templates/kuttl/delta-lake/10-s3-secret.yaml new file mode 100644 index 00000000..5c78faeb --- /dev/null +++ b/tests/templates/kuttl/delta-lake/10-s3-secret.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: minio-credentials + labels: + secrets.stackable.tech/class: s3-credentials-class +timeout: 240 +stringData: + accessKey: minioAccessKey + secretKey: minioSecretKey + # The following two entries are used by the Bitnami chart for MinIO to + # set up credentials for accessing buckets managed by the MinIO tenant. + root-user: minioAccessKey + root-password: minioSecretKey +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: s3-credentials-class +spec: + backend: + k8sSearch: + searchNamespace: + pod: {} diff --git a/tests/templates/kuttl/delta-lake/20-assert.yaml b/tests/templates/kuttl/delta-lake/20-assert.yaml new file mode 100644 index 00000000..fbbea3bd --- /dev/null +++ b/tests/templates/kuttl/delta-lake/20-assert.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 900 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-minio +status: + readyReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + name: minio-client + labels: + app: minio-client +status: + phase: Running diff --git a/tests/templates/kuttl/delta-lake/20-setup-minio.yaml b/tests/templates/kuttl/delta-lake/20-setup-minio.yaml new file mode 100644 index 00000000..ef60c970 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/20-setup-minio.yaml @@ -0,0 +1,48 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install test-minio + --namespace $NAMESPACE + --version 11.9.2 + -f helm-bitnami-minio-values.yaml + --repo https://charts.bitnami.com/bitnami minio + timeout: 240 +--- +apiVersion: v1 +kind: Pod +metadata: + name: minio-client + labels: + app: minio-client +spec: + selector: + matchLabels: + app: minio-client + restartPolicy: Never + containers: + - name: minio-client + image: docker.io/bitnami/minio-client:2022.8.11-debian-11-r3 + command: ["bash", "-c", "sleep infinity"] + stdin: true + tty: true + env: + - name: MINIO_SERVER_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-credentials + key: root-user + optional: false + - name: MINIO_SERVER_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-credentials + key: root-password + optional: false + - name: MINIO_SERVER_HOST + value: test-minio + - name: MINIO_SERVER_PORT_NUMBER + value: "9000" + - name: MINIO_SERVER_SCHEME + value: http diff --git a/tests/templates/kuttl/delta-lake/30-prepare-bucket.yaml b/tests/templates/kuttl/delta-lake/30-prepare-bucket.yaml new file mode 100644 index 00000000..9fd9b617 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/30-prepare-bucket.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # give minio enough time to start + - command: sleep 10 + - command: kubectl exec -n $NAMESPACE minio-client -- sh -c 'mc alias set test-minio http://test-minio:9000 $$MINIO_SERVER_ACCESS_KEY $$MINIO_SERVER_SECRET_KEY' + - command: kubectl exec -n $NAMESPACE minio-client -- mc mb test-minio/my-bucket + - command: kubectl exec -n $NAMESPACE minio-client -- mc policy set public test-minio/my-bucket diff --git a/tests/templates/kuttl/delta-lake/40-assert.yaml b/tests/templates/kuttl/delta-lake/40-assert.yaml new file mode 100644 index 00000000..f9d38189 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/40-assert.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +--- +# The Job starting the whole process +apiVersion: spark.stackable.tech/v1alpha1 +kind: SparkApplication +metadata: + name: pyspark-delta-lake +status: + phase: Succeeded diff --git a/tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 b/tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 new file mode 100644 index 00000000..4ad17431 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 @@ -0,0 +1,85 @@ +--- +apiVersion: spark.stackable.tech/v1alpha1 +kind: SparkApplication +metadata: + name: pyspark-delta-lake +spec: +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + sparkImage: +{% if test_scenario['values']['spark-delta-lake'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-delta-lake'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-delta-lake'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['spark-delta-lake'] }}" +{% endif %} + pullPolicy: IfNotPresent + mode: cluster + mainApplicationFile: "local:///stackable/spark/jobs/write-to-delta.py" + s3connection: + inline: + host: test-minio + port: 9000 + accessStyle: Path + credentials: + secretClass: s3-credentials-class + driver: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + volumeMounts: + - name: script + mountPath: /stackable/spark/jobs + executor: + replicas: 1 + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + volumeMounts: + - name: script + mountPath: /stackable/spark/jobs + deps: + requirements: + - importlib-metadata + - delta-spark=={{ test_scenario['values']['delta'] }} + packages: + - io.delta:delta-spark_2.12:{{ test_scenario['values']['delta'] }} + volumes: + - name: script + configMap: + name: write-to-delta +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: write-to-delta +data: + write-to-delta.py: | + from datetime import datetime, date + from pyspark.sql import Row, SparkSession + from delta import * + + def main(): + builder = SparkSession.builder.appName("MyApp") \ + .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \ + .config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore") \ + .config("spark.hadoop.delta.enableFastS3AListFrom", "true") + + spark = configure_spark_with_delta_pip(builder).getOrCreate() + + df = spark.createDataFrame([ + Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)), + Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)), + Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0)) + ]) + + location = "s3a://my-bucket/spark-delta-test" + + df.write.format("delta").mode("overwrite").save(location) + + if __name__ == "__main__": + main() diff --git a/tests/templates/kuttl/delta-lake/helm-bitnami-minio-values.yaml b/tests/templates/kuttl/delta-lake/helm-bitnami-minio-values.yaml new file mode 100644 index 00000000..c8891024 --- /dev/null +++ b/tests/templates/kuttl/delta-lake/helm-bitnami-minio-values.yaml @@ -0,0 +1,23 @@ +--- +volumePermissions: + enabled: false + +podSecurityContext: + enabled: false + +containerSecurityContext: + enabled: false + +mode: standalone + +disableWebUI: true + +persistence: + enabled: false + +resources: + requests: + memory: 1Gi + +auth: + existingSecret: minio-credentials diff --git a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 index ede72f0d..89143cee 100644 --- a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 @@ -24,9 +24,6 @@ spec: spark.sql.catalog.local: org.apache.iceberg.spark.SparkCatalog spark.sql.catalog.local.type: hadoop spark.sql.catalog.local.warehouse: /tmp/warehouse - job: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 index 1ea2e310..5d2eafcc 100644 --- a/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 @@ -17,27 +17,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: local:///stackable/spark/examples/jars/spark-examples.jar - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - vector: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO driver: config: logging: diff --git a/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 index a5227b37..a1338195 100644 --- a/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 @@ -16,23 +16,6 @@ data: rootLogger.appenderRefs = FILE rootLogger.appenderRef.FILE.ref = FILE --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: spark-submit-log-config -data: - log4j2.properties: |- - appenders = FILE - - appender.FILE.type = File - appender.FILE.name = FILE - appender.FILE.fileName = /stackable/log/spark-submit/spark.log4j2.xml - appender.FILE.layout.type = XMLLayout - - rootLogger.level = INFO - rootLogger.appenderRefs = FILE - rootLogger.appenderRef.FILE.ref = FILE ---- apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: @@ -51,14 +34,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: local:///stackable/spark/examples/jars/spark-examples.jar - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - custom: - configMap: spark-submit-log-config driver: config: logging: diff --git a/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 index 85d903cd..d1e70604 100644 --- a/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 @@ -18,27 +18,6 @@ spec: deps: requirements: - numpy==1.24.2 - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - vector: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO driver: config: logging: diff --git a/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 index f85328a1..3899b0e4 100644 --- a/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 @@ -16,23 +16,6 @@ data: rootLogger.appenderRefs = FILE rootLogger.appenderRef.FILE.ref = FILE --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: pyspark-submit-log-config -data: - log4j2.properties: |- - appenders = FILE - - appender.FILE.type = File - appender.FILE.name = FILE - appender.FILE.fileName = /stackable/log/spark-submit/spark.log4j2.xml - appender.FILE.layout.type = XMLLayout - - rootLogger.level = INFO - rootLogger.appenderRefs = FILE - rootLogger.appenderRef.FILE.ref = FILE ---- apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: @@ -52,14 +35,6 @@ spec: deps: requirements: - numpy==1.24.2 - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - custom: - configMap: pyspark-submit-log-config driver: config: logging: diff --git a/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 index 3911a46c..91b785db 100644 --- a/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 @@ -50,18 +50,6 @@ customConfig: # SparkApplication spark-automatic-log-config - filteredSparkAutomaticLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-automatic-log-config-[^-]+$') && - .container == "spark-submit" - filteredSparkAutomaticLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-automatic-log-config-[^-]+$') && - .container == "vector" filteredSparkAutomaticLogConfigDriverSpark: type: filter inputs: [vector] @@ -107,18 +95,6 @@ customConfig: # SparkApplication spark-custom-log-config - filteredSparkCustomLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-custom-log-config-[^-]+$') && - .container == "spark-submit" - filteredSparkCustomLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-custom-log-config-[^-]+$') && - .container == "vector" filteredSparkCustomLogConfigDriverSpark: type: filter inputs: [vector] @@ -164,18 +140,6 @@ customConfig: # SparkApplication pyspark-automatic-log-config - filteredPysparkAutomaticLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-automatic-log-config-[^-]+$') && - .container == "spark-submit" - filteredPysparkAutomaticLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-automatic-log-config-[^-]+$') && - .container == "vector" filteredPysparkAutomaticLogConfigDriverSpark: type: filter inputs: [vector] @@ -221,18 +185,6 @@ customConfig: # SparkApplication pyspark-custom-log-config - filteredPysparkCustomLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-custom-log-config-[^-]+$') && - .container == "spark-submit" - filteredPysparkCustomLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-custom-log-config-[^-]+$') && - .container == "vector" filteredPysparkCustomLogConfigDriverSpark: type: filter inputs: [vector] diff --git a/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 index ce607ef2..35aa5dc6 100644 --- a/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 @@ -29,10 +29,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 26f9a795..90986e01 100644 --- a/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -28,10 +28,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/resources/10-assert.yaml.j2 b/tests/templates/kuttl/resources/10-assert.yaml.j2 index 5e33a5fd..eb4e706b 100644 --- a/tests/templates/kuttl/resources/10-assert.yaml.j2 +++ b/tests/templates/kuttl/resources/10-assert.yaml.j2 @@ -19,9 +19,6 @@ spec: requests: cpu: 250m memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} --- apiVersion: v1 kind: Pod diff --git a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 index 23f885d2..cc8b9e5a 100644 --- a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 @@ -24,8 +24,6 @@ spec: spark.kubernetes.executor.podNamePrefix: "resources-crd" job: config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} resources: cpu: min: 250m diff --git a/tests/templates/kuttl/resources/12-assert.yaml.j2 b/tests/templates/kuttl/resources/12-assert.yaml.j2 index ac9cd049..35ef7245 100644 --- a/tests/templates/kuttl/resources/12-assert.yaml.j2 +++ b/tests/templates/kuttl/resources/12-assert.yaml.j2 @@ -19,9 +19,6 @@ spec: requests: cpu: 100m memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} --- apiVersion: v1 kind: Pod diff --git a/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 index d04aec51..d2abc94b 100644 --- a/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 @@ -33,10 +33,6 @@ spec: spark.executor.memory: "2g" spark.executor.memoryOverheadFactor: "0.4" spark.executor.instances: "1" - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 index b806109c..03a60a71 100644 --- a/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 index 0658c426..c8804ab8 100644 --- a/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 @@ -18,10 +18,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: "local:///stackable/spark/examples/jars/spark-examples.jar" - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 index 54508763..39203355 100644 --- a/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 index 9bc1f4fc..45aa28f9 100644 --- a/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 756e13fa..4bf26655 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -34,10 +34,6 @@ spec: - "--input /arguments/job-args.txt" s3connection: reference: minio - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 953f3945..a1219635 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: accessStyle: Path credentials: secretClass: spark-pi-private-s3-credentials-class - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index a5dcf506..3ac11bae 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -23,10 +23,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index d33409c7..fb5967fb 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -9,7 +9,11 @@ dimensions: - 3.5.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. - # - 3.4.0,docker.stackable.tech/sandbox/spark-k8s:3.4.0-stackable0.0.0-dev + # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev + - name: spark-delta-lake + values: + - 3.5.0 + # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: ny-tlc-report values: - 0.1.0 @@ -17,6 +21,9 @@ dimensions: values: - "false" - "true" + - name: delta + values: + - 3.1.0 tests: - name: smoke dimensions: @@ -70,6 +77,10 @@ tests: - name: iceberg dimensions: - spark + - name: delta-lake + dimensions: + - spark-delta-lake + - delta suites: - name: nightly patch: