From 8e1f2371cd0370bad808a811bd18f954434b13f7 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 12 Feb 2024 11:33:07 +0100 Subject: [PATCH 01/18] feat(test): addded test for delta.io This fails with: ``` spark org/apache/spark/sql/delta/stats/StatisticsCollection$SqlParser$$anon$1.visitMultipartIdentifierList(Lorg/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext;)Lscala/collection/Seq; @17: invokevirtual spark Reason: spark Type 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext' (current frame, stack[1]) is not assignable to 'org/antlr/v4/runtime/ParserRuleContext' spark Current Frame: spark bci: @17 spark flags: { } spark locals: { 'org/apache/spark/sql/delta/stats/StatisticsCollection$SqlParser$$anon$1', 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext' } spark stack: { 'org/apache/spark/sql/catalyst/parser/ParserUtils$', 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext', 'scala/Option', 'scala/Function0' } spark Bytecode: spark 0000000: b200 232b b200 23b6 0027 2a2b ba00 3f00 spark 0000010: 00b6 0043 c000 45b0 spark spark at org.apache.spark.sql.delta.stats.StatisticsCollection$SqlParser.(StatisticsCollection.scala:409) spark at org.apache.spark.sql.delta.stats.StatisticsCollection$.(StatisticsCollection.scala:422) spark at org.apache.spark.sql.delta.stats.StatisticsCollection$.(StatisticsCollection.scala) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadataInternal(OptimisticTransaction.scala:429) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadataInternal$(OptimisticTransaction.scala:424) spark at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadataInternal(OptimisticTransaction.scala:142) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:400) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:393) spark at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:142) spark at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:97) spark at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:56) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.updateMetadata(WriteIntoDelta.scala:76) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.write(WriteIntoDelta.scala:162) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:105) ``` --- tests/templates/kuttl/delta/01-assert.yaml.j2 | 10 +++ ...tor-aggregator-discovery-configmap.yaml.j2 | 9 ++ tests/templates/kuttl/delta/10-s3-secret.yaml | 25 ++++++ tests/templates/kuttl/delta/20-assert.yaml | 20 +++++ .../templates/kuttl/delta/20-setup-minio.yaml | 48 ++++++++++ .../kuttl/delta/30-prepare-bucket.yaml | 9 ++ tests/templates/kuttl/delta/40-assert.yaml | 12 +++ .../kuttl/delta/40-spark-app.yaml.j2 | 88 +++++++++++++++++++ .../delta/helm-bitnami-minio-values.yaml | 23 +++++ tests/test-definition.yaml | 7 ++ 10 files changed, 251 insertions(+) create mode 100644 tests/templates/kuttl/delta/01-assert.yaml.j2 create mode 100644 tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 create mode 100644 tests/templates/kuttl/delta/10-s3-secret.yaml create mode 100644 tests/templates/kuttl/delta/20-assert.yaml create mode 100644 tests/templates/kuttl/delta/20-setup-minio.yaml create mode 100644 tests/templates/kuttl/delta/30-prepare-bucket.yaml create mode 100644 tests/templates/kuttl/delta/40-assert.yaml create mode 100644 tests/templates/kuttl/delta/40-spark-app.yaml.j2 create mode 100644 tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml diff --git a/tests/templates/kuttl/delta/01-assert.yaml.j2 b/tests/templates/kuttl/delta/01-assert.yaml.j2 new file mode 100644 index 00000000..50b1d4c3 --- /dev/null +++ b/tests/templates/kuttl/delta/01-assert.yaml.j2 @@ -0,0 +1,10 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +{% endif %} diff --git a/tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 new file mode 100644 index 00000000..2d6a0df5 --- /dev/null +++ b/tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 @@ -0,0 +1,9 @@ +{% if lookup('env', 'VECTOR_AGGREGATOR') %} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vector-aggregator-discovery +data: + ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }} +{% endif %} diff --git a/tests/templates/kuttl/delta/10-s3-secret.yaml b/tests/templates/kuttl/delta/10-s3-secret.yaml new file mode 100644 index 00000000..5c78faeb --- /dev/null +++ b/tests/templates/kuttl/delta/10-s3-secret.yaml @@ -0,0 +1,25 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: minio-credentials + labels: + secrets.stackable.tech/class: s3-credentials-class +timeout: 240 +stringData: + accessKey: minioAccessKey + secretKey: minioSecretKey + # The following two entries are used by the Bitnami chart for MinIO to + # set up credentials for accessing buckets managed by the MinIO tenant. + root-user: minioAccessKey + root-password: minioSecretKey +--- +apiVersion: secrets.stackable.tech/v1alpha1 +kind: SecretClass +metadata: + name: s3-credentials-class +spec: + backend: + k8sSearch: + searchNamespace: + pod: {} diff --git a/tests/templates/kuttl/delta/20-assert.yaml b/tests/templates/kuttl/delta/20-assert.yaml new file mode 100644 index 00000000..fbbea3bd --- /dev/null +++ b/tests/templates/kuttl/delta/20-assert.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 900 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: test-minio +status: + readyReplicas: 1 +--- +apiVersion: v1 +kind: Pod +metadata: + name: minio-client + labels: + app: minio-client +status: + phase: Running diff --git a/tests/templates/kuttl/delta/20-setup-minio.yaml b/tests/templates/kuttl/delta/20-setup-minio.yaml new file mode 100644 index 00000000..ef60c970 --- /dev/null +++ b/tests/templates/kuttl/delta/20-setup-minio.yaml @@ -0,0 +1,48 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + - script: >- + helm install test-minio + --namespace $NAMESPACE + --version 11.9.2 + -f helm-bitnami-minio-values.yaml + --repo https://charts.bitnami.com/bitnami minio + timeout: 240 +--- +apiVersion: v1 +kind: Pod +metadata: + name: minio-client + labels: + app: minio-client +spec: + selector: + matchLabels: + app: minio-client + restartPolicy: Never + containers: + - name: minio-client + image: docker.io/bitnami/minio-client:2022.8.11-debian-11-r3 + command: ["bash", "-c", "sleep infinity"] + stdin: true + tty: true + env: + - name: MINIO_SERVER_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-credentials + key: root-user + optional: false + - name: MINIO_SERVER_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-credentials + key: root-password + optional: false + - name: MINIO_SERVER_HOST + value: test-minio + - name: MINIO_SERVER_PORT_NUMBER + value: "9000" + - name: MINIO_SERVER_SCHEME + value: http diff --git a/tests/templates/kuttl/delta/30-prepare-bucket.yaml b/tests/templates/kuttl/delta/30-prepare-bucket.yaml new file mode 100644 index 00000000..9fd9b617 --- /dev/null +++ b/tests/templates/kuttl/delta/30-prepare-bucket.yaml @@ -0,0 +1,9 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestStep +commands: + # give minio enough time to start + - command: sleep 10 + - command: kubectl exec -n $NAMESPACE minio-client -- sh -c 'mc alias set test-minio http://test-minio:9000 $$MINIO_SERVER_ACCESS_KEY $$MINIO_SERVER_SECRET_KEY' + - command: kubectl exec -n $NAMESPACE minio-client -- mc mb test-minio/my-bucket + - command: kubectl exec -n $NAMESPACE minio-client -- mc policy set public test-minio/my-bucket diff --git a/tests/templates/kuttl/delta/40-assert.yaml b/tests/templates/kuttl/delta/40-assert.yaml new file mode 100644 index 00000000..477afd3a --- /dev/null +++ b/tests/templates/kuttl/delta/40-assert.yaml @@ -0,0 +1,12 @@ +--- +apiVersion: kuttl.dev/v1beta1 +kind: TestAssert +timeout: 300 +--- +# The Job starting the whole process +apiVersion: spark.stackable.tech/v1alpha1 +kind: SparkApplication +metadata: + name: pyspark-delta +status: + phase: Succeeded diff --git a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 new file mode 100644 index 00000000..5d351193 --- /dev/null +++ b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 @@ -0,0 +1,88 @@ +--- +apiVersion: spark.stackable.tech/v1alpha1 +kind: SparkApplication +metadata: + name: pyspark-delta +spec: +{% if lookup('env', 'VECTOR_AGGREGATOR') %} + vectorAggregatorConfigMapName: vector-aggregator-discovery +{% endif %} + sparkImage: +{% if test_scenario['values']['spark'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% else %} + productVersion: "{{ test_scenario['values']['spark'] }}" +{% endif %} + pullPolicy: IfNotPresent + mode: cluster + mainApplicationFile: "local:///stackable/spark/jobs/write-to-delta.py" + s3connection: + inline: + host: test-minio + port: 9000 + accessStyle: Path + credentials: + secretClass: s3-credentials-class + job: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + driver: + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + volumeMounts: + - name: script + mountPath: /stackable/spark/jobs + executor: + replicas: 1 + config: + logging: + enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} + volumeMounts: + - name: script + mountPath: /stackable/spark/jobs + deps: + requirements: + - importlib-metadata + - delta-spark=={{ test_scenario['values']['delta'] }} + packages: + - io.delta:delta-spark_2.12:{{ test_scenario['values']['delta'] }} + volumes: + - name: script + configMap: + name: write-to-delta +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: write-to-delta +data: + write-to-delta.py: | + from datetime import datetime, date + from pyspark.sql import Row, SparkSession + from delta import * + + def main(): + builder = SparkSession.builder.appName("MyApp") \ + .config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \ + .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \ + .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \ + .config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \ + .config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore") \ + .config("spark.hadoop.delta.enableFastS3AListFrom", "true") + + spark = configure_spark_with_delta_pip(builder).getOrCreate() + + df = spark.createDataFrame([ + Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)), + Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)), + Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0)) + ]) + + location = "s3a://my-bucket/spark-delta-test" + + df.write.format("delta").mode("overwrite").save(location) + + if __name__ == "__main__": + main() diff --git a/tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml b/tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml new file mode 100644 index 00000000..c8891024 --- /dev/null +++ b/tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml @@ -0,0 +1,23 @@ +--- +volumePermissions: + enabled: false + +podSecurityContext: + enabled: false + +containerSecurityContext: + enabled: false + +mode: standalone + +disableWebUI: true + +persistence: + enabled: false + +resources: + requests: + memory: 1Gi + +auth: + existingSecret: minio-credentials diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index d33409c7..2f4ff107 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -17,6 +17,9 @@ dimensions: values: - "false" - "true" + - name: delta + values: + - 3.1.0 tests: - name: smoke dimensions: @@ -70,6 +73,10 @@ tests: - name: iceberg dimensions: - spark + - name: delta + dimensions: + - spark + - delta suites: - name: nightly patch: From e82bb56dfd4335fe8a6f90215a12b9e4a6646b73 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 12 Feb 2024 15:45:16 +0100 Subject: [PATCH 02/18] fix: remove userClassPathFirst and extraClassPath properties. --- rust/crd/src/lib.rs | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index b8ba860a..bd326899 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -483,11 +483,7 @@ impl SparkApplication { format!("--conf spark.kubernetes.executor.container.image={}", spark_image.to_string()), format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), format!("--conf spark.driver.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), - format!("--conf spark.driver.extraClassPath=/stackable/spark/extra-jars/*"), - "--conf spark.driver.userClassPathFirst=true".to_string(), - format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), - format!("--conf spark.executor.extraClassPath=/stackable/spark/extra-jars/*"), - "--conf spark.executor.userClassPathFirst=true".to_string(), + format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}") ]); // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management From 7f65711edc626a8c2a4952e523ad4ca8dcc1b6b3 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:22:42 +0100 Subject: [PATCH 03/18] feat: remove all refs to the extra-jars folder --- CHANGELOG.md | 3 +++ .../src/history/history_controller.rs | 5 ----- rust/operator-binary/src/spark_k8s_controller.rs | 3 +-- tests/templates/kuttl/delta/40-spark-app.yaml.j2 | 8 ++++---- tests/test-definition.yaml | 15 +++++++++++---- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 91ae7389..a9b621ee 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ All notable changes to this project will be documented in this file. - Various documentation of the CRD ([#319]). - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]). +- [BREAKING] Remove `userClassPathFirst` properties from `spark-submit` and all references to the + `/stackable/spark/extra-jars` folder. This PR is paired with the `docker-images` PR #556 which removes that folder. ([#350]) ### Fixed @@ -22,6 +24,7 @@ All notable changes to this project will be documented in this file. [#313]: https://github.com/stackabletech/spark-k8s-operator/pull/313 [#319]: https://github.com/stackabletech/spark-k8s-operator/pull/319 [#344]: https://github.com/stackabletech/spark-k8s-operator/pull/344 +[#350]: https://github.com/stackabletech/spark-k8s-operator/pull/350 ## [23.11.0] - 2023-11-24 diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index e8744c72..cc262405 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -671,11 +671,6 @@ fn env_vars(s3logdir: &S3LogDir) -> Vec { value: Some("true".into()), value_from: None, }); - vars.push(EnvVar { - name: "SPARK_DAEMON_CLASSPATH".to_string(), - value: Some("/stackable/spark/extra-jars/*".into()), - value_from: None, - }); let mut history_opts = vec![ format!("-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 8ac476a8..87c8b53b 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -762,8 +762,7 @@ fn spark_job( .add_env_var( "SPARK_SUBMIT_OPTS", format!( - "-cp /stackable/spark/extra-jars/*:/stackable/spark/jars/* \ - -Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" + "-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" ), ) // TODO: move this to the image diff --git a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 index 5d351193..981549f9 100644 --- a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 +++ b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 @@ -8,11 +8,11 @@ spec: vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} sparkImage: -{% if test_scenario['values']['spark'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}" +{% if test_scenario['values']['spark-delta'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-delta'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-delta'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark'] }}" + productVersion: "{{ test_scenario['values']['spark-delta'] }}" {% endif %} pullPolicy: IfNotPresent mode: cluster diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 2f4ff107..0449e7ef 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -5,11 +5,18 @@ dimensions: - "false" - name: spark values: - - 3.4.1 - - 3.5.0 + #- 3.4.1 + #- 3.5.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. - # - 3.4.0,docker.stackable.tech/sandbox/spark-k8s:3.4.0-stackable0.0.0-dev + - 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev + - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev + - name: spark-delta + values: + #- 3.5.0 + - 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev + - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev + - name: ny-tlc-report values: - 0.1.0 @@ -75,7 +82,7 @@ tests: - spark - name: delta dimensions: - - spark + - spark-delta - delta suites: - name: nightly From faef52de6d6405750e47eb7199bfe5cac83128ba Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:25:23 +0100 Subject: [PATCH 04/18] fix: pr number --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a9b621ee..243f0d60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ All notable changes to this project will be documented in this file. - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]). - [BREAKING] Remove `userClassPathFirst` properties from `spark-submit` and all references to the - `/stackable/spark/extra-jars` folder. This PR is paired with the `docker-images` PR #556 which removes that folder. ([#350]) + `/stackable/spark/extra-jars` folder. This PR is paired with the `docker-images` PR #556 which removes that folder. ([#355]) ### Fixed @@ -24,7 +24,7 @@ All notable changes to this project will be documented in this file. [#313]: https://github.com/stackabletech/spark-k8s-operator/pull/313 [#319]: https://github.com/stackabletech/spark-k8s-operator/pull/319 [#344]: https://github.com/stackabletech/spark-k8s-operator/pull/344 -[#350]: https://github.com/stackabletech/spark-k8s-operator/pull/350 +[#355]: https://github.com/stackabletech/spark-k8s-operator/pull/355 ## [23.11.0] - 2023-11-24 From fa0f40fed91eddab9ff7f686b4113ea694fd4110 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Mon, 12 Feb 2024 17:34:51 +0100 Subject: [PATCH 05/18] fix lints --- rust/operator-binary/src/spark_k8s_controller.rs | 2 +- tests/test-definition.yaml | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index 87c8b53b..e8f04f3f 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -762,7 +762,7 @@ fn spark_job( .add_env_var( "SPARK_SUBMIT_OPTS", format!( - "-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" + "-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}" ), ) // TODO: move this to the image diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 0449e7ef..56ca4b26 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -5,16 +5,15 @@ dimensions: - "false" - name: spark values: - #- 3.4.1 - #- 3.5.0 + # - 3.4.1 + # - 3.5.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. - 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: spark-delta values: - #- 3.5.0 - - 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev + # - 3.5.0 - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: ny-tlc-report From a93de66734d4e0a6fcd3be6f5233d7f6f8bfe41e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 13 Feb 2024 11:34:53 +0100 Subject: [PATCH 06/18] Revert extra-jars removal but don't set the userClassPathFirst properties. --- rust/crd/src/lib.rs | 7 ++++++- rust/operator-binary/src/history/history_controller.rs | 5 +++++ tests/test-definition.yaml | 6 +++--- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index bd326899..2a2ff252 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -483,7 +483,12 @@ impl SparkApplication { format!("--conf spark.kubernetes.executor.container.image={}", spark_image.to_string()), format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), format!("--conf spark.driver.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), - format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}") + format!("--conf spark.driver.extraClassPath=/stackable/spark/extra-jars/*"), + //"--conf spark.driver.userClassPathFirst=true".to_string(), + "--conf spark.driver.userClassPathFirst=true".to_string(), + format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), + format!("--conf spark.executor.extraClassPath=/stackable/spark/extra-jars/*"), + //"--conf spark.executor.userClassPathFirst=true".to_string(), ]); // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management diff --git a/rust/operator-binary/src/history/history_controller.rs b/rust/operator-binary/src/history/history_controller.rs index cc262405..e8744c72 100644 --- a/rust/operator-binary/src/history/history_controller.rs +++ b/rust/operator-binary/src/history/history_controller.rs @@ -671,6 +671,11 @@ fn env_vars(s3logdir: &S3LogDir) -> Vec { value: Some("true".into()), value_from: None, }); + vars.push(EnvVar { + name: "SPARK_DAEMON_CLASSPATH".to_string(), + value: Some("/stackable/spark/extra-jars/*".into()), + value_from: None, + }); let mut history_opts = vec![ format!("-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 56ca4b26..6d0de711 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -6,11 +6,11 @@ dimensions: - name: spark values: # - 3.4.1 - # - 3.5.0 + - 3.5.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. - - 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev - - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev + #- 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev + #- 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: spark-delta values: # - 3.5.0 From a76d5e1204c03f2f3ed344646f6b26c9f32c4c4e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 13 Feb 2024 14:30:56 +0100 Subject: [PATCH 07/18] fix(tests): update Python script for logging tests --- rust/crd/src/lib.rs | 3 --- tests/templates/kuttl/logging/test_log_aggregation.py | 6 ++++++ tests/test-definition.yaml | 10 ++++------ 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 2a2ff252..83615774 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -484,11 +484,8 @@ impl SparkApplication { format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), format!("--conf spark.driver.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), format!("--conf spark.driver.extraClassPath=/stackable/spark/extra-jars/*"), - //"--conf spark.driver.userClassPathFirst=true".to_string(), - "--conf spark.driver.userClassPathFirst=true".to_string(), format!("--conf spark.executor.defaultJavaOptions=-Dlog4j.configurationFile={VOLUME_MOUNT_PATH_LOG_CONFIG}/{LOG4J2_CONFIG_FILE}"), format!("--conf spark.executor.extraClassPath=/stackable/spark/extra-jars/*"), - //"--conf spark.executor.userClassPathFirst=true".to_string(), ]); // See https://spark.apache.org/docs/latest/running-on-kubernetes.html#dependency-management diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index 90124086..56fd7ebd 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -37,6 +37,12 @@ def check_sent_events(): assert sentEvents is None or \ sentEvents['sentEventsTotal'] == 0, \ 'Invalid log events were sent.' + elif componentId.find('LogConfigSubmitSpark') > 0: + # [*LogConfigSubmitSpark] components do not log in XML format + # because spark-submit doesn't know about the /stackable/spark/extra-jars folder. + assert sentEvents is None or \ + sentEvents['sentEventsTotal'] == 0, \ + f'Found events for {componentId} but did not expect any.' else: assert sentEvents is not None and \ sentEvents['sentEventsTotal'] > 0, \ diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index 6d0de711..d95461f4 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -5,17 +5,15 @@ dimensions: - "false" - name: spark values: - # - 3.4.1 + - 3.4.1 - 3.5.0 # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. - #- 3.4.1,docker.stackable.tech/sandbox/spark-k8s:3.4.1-stackable0.0.0-dev - #- 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev + # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: spark-delta values: - # - 3.5.0 - - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - + - 3.5.0 + # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - name: ny-tlc-report values: - 0.1.0 From 91cf911f7a11ddff44a3ea13138d6510d74ed851 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:14:07 +0100 Subject: [PATCH 08/18] fix(docs): update changelog and add logging.adoc page --- CHANGELOG.md | 6 ++++-- docs/modules/spark-k8s/pages/usage-guide/logging.adoc | 5 +++++ 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 docs/modules/spark-k8s/pages/usage-guide/logging.adoc diff --git a/CHANGELOG.md b/CHANGELOG.md index 243f0d60..b0d2ad5f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,8 +13,10 @@ All notable changes to this project will be documented in this file. - Various documentation of the CRD ([#319]). - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]). -- [BREAKING] Remove `userClassPathFirst` properties from `spark-submit` and all references to the - `/stackable/spark/extra-jars` folder. This PR is paired with the `docker-images` PR #556 which removes that folder. ([#355]) +- [BREAKING] Remove `userClassPathFirst` properties from `spark-submit`. This is an experimental feature that was + introduced to support logging in xml format. The side-effect of this removal is that the vector agent cannot + aggregate output from the `spark-submit` containers. On the other side, it enables dynamic provisionining of + delta.io packages (among others) with Stackable stock images which is much more important. ([#355]) ### Fixed diff --git a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc new file mode 100644 index 00000000..271fc99c --- /dev/null +++ b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc @@ -0,0 +1,5 @@ += Logging + +Logs produced by SparkApplications are automatically collected by vector agents and can be aggregated as described in xref:concepts:logging.adoc[roles] + +NOTE: Only logs produced by the application's driver and executors are collected. Logs produced by `spark-submit` are discarded. From 8531e85ce474f2b3c8f869e9a17c5b21b783292e Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Tue, 13 Feb 2024 15:57:55 +0100 Subject: [PATCH 09/18] fix: language tool lints --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b0d2ad5f..9c375eed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ All notable changes to this project will be documented in this file. - [BREAKING] Removed version field. Several attributes have been changed to mandatory. While this change is technically breaking, existing Spark jobs would not have worked before as these attributes were necessary ([#319]). - [BREAKING] Remove `userClassPathFirst` properties from `spark-submit`. This is an experimental feature that was - introduced to support logging in xml format. The side-effect of this removal is that the vector agent cannot + introduced to support logging in XML format. The side effect of this removal is that the vector agent cannot aggregate output from the `spark-submit` containers. On the other side, it enables dynamic provisionining of delta.io packages (among others) with Stackable stock images which is much more important. ([#355]) From ccb667203b9c5aa2db924ec5f47ea3ac4cc9b81a Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Feb 2024 13:43:36 +0100 Subject: [PATCH 10/18] fix: better logging docs --- docs/modules/spark-k8s/pages/usage-guide/logging.adoc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc index 271fc99c..ae4b571e 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc @@ -1,5 +1,8 @@ = Logging -Logs produced by SparkApplications are automatically collected by vector agents and can be aggregated as described in xref:concepts:logging.adoc[roles] +The Spark operator installs a https://vector.dev/docs/setup/deployment/roles/#agent[vector agent] as a side-car container in every application Pod except the `job` Pod that runs `spark-submit`. It also configures the logging framework to output logs in XML format. This is the same https://logging.apache.org/log4j/2.x/manual/layouts.html#XMLLayout[format] used across all Stackable products and it enables the https://vector.dev/docs/setup/deployment/roles/#aggregator[vector aggregator] to collect logs across the entire platform. + +It is the user's responsibility to install and configure the vector aggregator, but the agents can discover the aggregator automatically using a discovery ConfigMap as described in the xref:concepts:logging.adoc[logging concepts]. NOTE: Only logs produced by the application's driver and executors are collected. Logs produced by `spark-submit` are discarded. + From eba079557f4c465bc05a0ce3f2901d5c9743533c Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 14 Feb 2024 15:58:20 +0100 Subject: [PATCH 11/18] Remove the logging configuration for the Spark submit job --- rust/crd/src/lib.rs | 41 +++++++------ rust/crd/src/roles.rs | 5 +- .../src/spark_k8s_controller.rs | 61 ++----------------- ...loy-automatic-log-config-spark-app.yaml.j2 | 21 ------- ...deploy-custom-log-config-spark-app.yaml.j2 | 25 -------- ...y-automatic-log-config-pyspark-app.yaml.j2 | 21 ------- ...ploy-custom-log-config-pyspark-app.yaml.j2 | 25 -------- .../spark-vector-aggregator-values.yaml.j2 | 48 --------------- .../kuttl/logging/test_log_aggregation.py | 6 -- 9 files changed, 28 insertions(+), 225 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 83615774..1f4e55f8 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -253,7 +253,7 @@ impl SparkApplication { &self, s3conn: &Option, s3logdir: &Option, - log_config_map: &str, + log_config_map: Option<&str>, ) -> Result, Error> { let mut result: Vec = self.spec.volumes.clone(); @@ -294,11 +294,13 @@ impl SparkApplication { } } - result.push( - VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG) - .with_config_map(log_config_map) - .build(), - ); + if let Some(log_config_map) = log_config_map { + result.push( + VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG_CONFIG) + .with_config_map(log_config_map) + .build(), + ); + } result.push( VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG) @@ -358,7 +360,7 @@ impl SparkApplication { ..VolumeMount::default() }, ]; - self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir) + self.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, false) } fn add_common_volume_mounts( @@ -366,6 +368,7 @@ impl SparkApplication { mut mounts: Vec, s3conn: &Option, s3logdir: &Option, + logging_enabled: bool, ) -> Vec { if self.spec.image.is_some() { mounts.push(VolumeMount { @@ -401,17 +404,19 @@ impl SparkApplication { mounts.push(vm); } - mounts.push(VolumeMount { - name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(), - mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(), - ..VolumeMount::default() - }); - - mounts.push(VolumeMount { - name: VOLUME_MOUNT_NAME_LOG.into(), - mount_path: VOLUME_MOUNT_PATH_LOG.into(), - ..VolumeMount::default() - }); + if logging_enabled { + mounts.push(VolumeMount { + name: VOLUME_MOUNT_NAME_LOG_CONFIG.into(), + mount_path: VOLUME_MOUNT_PATH_LOG_CONFIG.into(), + ..VolumeMount::default() + }); + + mounts.push(VolumeMount { + name: VOLUME_MOUNT_NAME_LOG.into(), + mount_path: VOLUME_MOUNT_PATH_LOG.into(), + ..VolumeMount::default() + }); + } if !self.packages().is_empty() { mounts.push(VolumeMount { diff --git a/rust/crd/src/roles.rs b/rust/crd/src/roles.rs index 9ac3241f..d1b14a6a 100644 --- a/rust/crd/src/roles.rs +++ b/rust/crd/src/roles.rs @@ -147,7 +147,7 @@ impl RoleConfig { s3logdir: &Option, ) -> Vec { let volume_mounts = self.volume_mounts.clone().unwrap_or_default().into(); - spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir) + spark_application.add_common_volume_mounts(volume_mounts, s3conn, s3logdir, true) } } @@ -200,8 +200,6 @@ impl Configuration for RoleConfigFragment { pub struct SubmitConfig { #[fragment_attrs(serde(default))] pub resources: Resources, - #[fragment_attrs(serde(default))] - pub logging: Logging, } impl SubmitConfig { @@ -218,7 +216,6 @@ impl SubmitConfig { }, storage: SparkStorageConfigFragment {}, }, - logging: product_logging::spec::default_logging(), } } } diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index e8f04f3f..8b3dc344 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -318,8 +318,6 @@ pub async fn reconcile(spark_application: Arc, ctx: Arc) let submit_job_config_map = submit_job_config_map( &spark_application, submit_product_config, - vector_aggregator_address.as_deref(), - &submit_config.logging, &resolved_product_image, )?; client @@ -580,7 +578,7 @@ fn pod_template_config_map( }; let mut volumes = spark_application - .volumes(s3conn, s3logdir, &log_config_map) + .volumes(s3conn, s3logdir, Some(&log_config_map)) .context(CreateVolumesSnafu)?; volumes.push( VolumeBuilder::new(VOLUME_MOUNT_NAME_CONFIG) @@ -657,8 +655,6 @@ fn pod_template_config_map( fn submit_job_config_map( spark_application: &SparkApplication, product_config: Option<&HashMap>>, - vector_aggregator_address: Option<&str>, - logging: &Logging, spark_image: &ResolvedProductImage, ) -> Result { let cm_name = spark_application.submit_job_config_map_name(); @@ -679,20 +675,6 @@ fn submit_job_config_map( .build(), ); - product_logging::extend_config_map( - &RoleGroupRef { - cluster: ObjectRef::from_obj(spark_application), - role: String::new(), - role_group: String::new(), - }, - vector_aggregator_address, - logging, - SparkContainer::SparkSubmit, - SparkContainer::Vector, - &mut cm_builder, - ) - .context(InvalidLoggingConfigSnafu { cm_name })?; - if let Some(product_config) = product_config { let jvm_sec_props: BTreeMap> = product_config .get(&PropertyNameKind::File( @@ -731,27 +713,7 @@ fn spark_job( let mut cb = ContainerBuilder::new(&SparkContainer::SparkSubmit.to_string()) .context(IllegalContainerNameSnafu)?; - let log_config_map = if let Some(ContainerLogConfig { - choice: - Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig { - custom: ConfigMapLogConfig { config_map }, - })), - }) = job_config - .logging - .containers - .get(&SparkContainer::SparkSubmit) - { - config_map.into() - } else { - spark_application.submit_job_config_map_name() - }; - - let mut args = vec![job_commands.join(" ")]; - if job_config.logging.enable_vector_agent { - // Wait for Vector to gather the logs. - args.push("sleep 10".into()); - args.push(create_vector_shutdown_file_command(VOLUME_MOUNT_PATH_LOG)); - } + let args = vec![job_commands.join(" ")]; cb.image_from_product_image(spark_image) .command(vec!["/bin/bash".to_string(), "-c".to_string()]) @@ -785,26 +747,11 @@ fn spark_job( ]; volumes.extend( spark_application - .volumes(s3conn, s3logdir, &log_config_map) + .volumes(s3conn, s3logdir, None) .context(CreateVolumesSnafu)?, ); - let mut containers = vec![cb.build()]; - - if job_config.logging.enable_vector_agent { - containers.push(vector_container( - spark_image, - VOLUME_MOUNT_NAME_CONFIG, - VOLUME_MOUNT_NAME_LOG, - job_config.logging.containers.get(&SparkContainer::Vector), - ResourceRequirementsBuilder::new() - .with_cpu_request("250m") - .with_cpu_limit("500m") - .with_memory_request("128Mi") - .with_memory_limit("128Mi") - .build(), - )); - } + let containers = vec![cb.build()]; let mut pod = PodTemplateSpec { diff --git a/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 index 1ea2e310..5d2eafcc 100644 --- a/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/05-deploy-automatic-log-config-spark-app.yaml.j2 @@ -17,27 +17,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: local:///stackable/spark/examples/jars/spark-examples.jar - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - vector: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO driver: config: logging: diff --git a/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 b/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 index a5227b37..a1338195 100644 --- a/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/06-deploy-custom-log-config-spark-app.yaml.j2 @@ -16,23 +16,6 @@ data: rootLogger.appenderRefs = FILE rootLogger.appenderRef.FILE.ref = FILE --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: spark-submit-log-config -data: - log4j2.properties: |- - appenders = FILE - - appender.FILE.type = File - appender.FILE.name = FILE - appender.FILE.fileName = /stackable/log/spark-submit/spark.log4j2.xml - appender.FILE.layout.type = XMLLayout - - rootLogger.level = INFO - rootLogger.appenderRefs = FILE - rootLogger.appenderRef.FILE.ref = FILE ---- apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: @@ -51,14 +34,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: local:///stackable/spark/examples/jars/spark-examples.jar - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - custom: - configMap: spark-submit-log-config driver: config: logging: diff --git a/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 index 85d903cd..d1e70604 100644 --- a/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/07-deploy-automatic-log-config-pyspark-app.yaml.j2 @@ -18,27 +18,6 @@ spec: deps: requirements: - numpy==1.24.2 - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO - vector: - console: - level: INFO - file: - level: INFO - loggers: - ROOT: - level: INFO driver: config: logging: diff --git a/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 b/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 index f85328a1..3899b0e4 100644 --- a/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 +++ b/tests/templates/kuttl/logging/08-deploy-custom-log-config-pyspark-app.yaml.j2 @@ -16,23 +16,6 @@ data: rootLogger.appenderRefs = FILE rootLogger.appenderRef.FILE.ref = FILE --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: pyspark-submit-log-config -data: - log4j2.properties: |- - appenders = FILE - - appender.FILE.type = File - appender.FILE.name = FILE - appender.FILE.fileName = /stackable/log/spark-submit/spark.log4j2.xml - appender.FILE.layout.type = XMLLayout - - rootLogger.level = INFO - rootLogger.appenderRefs = FILE - rootLogger.appenderRef.FILE.ref = FILE ---- apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: @@ -52,14 +35,6 @@ spec: deps: requirements: - numpy==1.24.2 - job: - config: - logging: - enableVectorAgent: true - containers: - spark-submit: - custom: - configMap: pyspark-submit-log-config driver: config: logging: diff --git a/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 b/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 index 3911a46c..91b785db 100644 --- a/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 +++ b/tests/templates/kuttl/logging/spark-vector-aggregator-values.yaml.j2 @@ -50,18 +50,6 @@ customConfig: # SparkApplication spark-automatic-log-config - filteredSparkAutomaticLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-automatic-log-config-[^-]+$') && - .container == "spark-submit" - filteredSparkAutomaticLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-automatic-log-config-[^-]+$') && - .container == "vector" filteredSparkAutomaticLogConfigDriverSpark: type: filter inputs: [vector] @@ -107,18 +95,6 @@ customConfig: # SparkApplication spark-custom-log-config - filteredSparkCustomLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-custom-log-config-[^-]+$') && - .container == "spark-submit" - filteredSparkCustomLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^spark-custom-log-config-[^-]+$') && - .container == "vector" filteredSparkCustomLogConfigDriverSpark: type: filter inputs: [vector] @@ -164,18 +140,6 @@ customConfig: # SparkApplication pyspark-automatic-log-config - filteredPysparkAutomaticLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-automatic-log-config-[^-]+$') && - .container == "spark-submit" - filteredPysparkAutomaticLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-automatic-log-config-[^-]+$') && - .container == "vector" filteredPysparkAutomaticLogConfigDriverSpark: type: filter inputs: [vector] @@ -221,18 +185,6 @@ customConfig: # SparkApplication pyspark-custom-log-config - filteredPysparkCustomLogConfigSubmitSpark: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-custom-log-config-[^-]+$') && - .container == "spark-submit" - filteredPysparkCustomLogConfigSubmitVector: - type: filter - inputs: [vector] - condition: >- - match(string!(.pod), r'^pyspark-custom-log-config-[^-]+$') && - .container == "vector" filteredPysparkCustomLogConfigDriverSpark: type: filter inputs: [vector] diff --git a/tests/templates/kuttl/logging/test_log_aggregation.py b/tests/templates/kuttl/logging/test_log_aggregation.py index 56fd7ebd..90124086 100755 --- a/tests/templates/kuttl/logging/test_log_aggregation.py +++ b/tests/templates/kuttl/logging/test_log_aggregation.py @@ -37,12 +37,6 @@ def check_sent_events(): assert sentEvents is None or \ sentEvents['sentEventsTotal'] == 0, \ 'Invalid log events were sent.' - elif componentId.find('LogConfigSubmitSpark') > 0: - # [*LogConfigSubmitSpark] components do not log in XML format - # because spark-submit doesn't know about the /stackable/spark/extra-jars folder. - assert sentEvents is None or \ - sentEvents['sentEventsTotal'] == 0, \ - f'Found events for {componentId} but did not expect any.' else: assert sentEvents is not None and \ sentEvents['sentEventsTotal'] > 0, \ From 6e085366af98474a16c87a73978e6151f36d5a18 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:53:57 +0100 Subject: [PATCH 12/18] Update CHANGELOG.md Co-authored-by: Sebastian Bernauer --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c375eed..3556d671 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ All notable changes to this project will be documented in this file. - [BREAKING] Remove `userClassPathFirst` properties from `spark-submit`. This is an experimental feature that was introduced to support logging in XML format. The side effect of this removal is that the vector agent cannot aggregate output from the `spark-submit` containers. On the other side, it enables dynamic provisionining of - delta.io packages (among others) with Stackable stock images which is much more important. ([#355]) + java packages (such as Delta Lake) with Stackable stock images which is much more important. ([#355]) ### Fixed From 761f1dc6c9c77c4bd0c109e5eeb1a6d8b72bd17b Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:54:57 +0100 Subject: [PATCH 13/18] Update docs/modules/spark-k8s/pages/usage-guide/logging.adoc Co-authored-by: Sebastian Bernauer --- docs/modules/spark-k8s/pages/usage-guide/logging.adoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc index ae4b571e..768d2cba 100644 --- a/docs/modules/spark-k8s/pages/usage-guide/logging.adoc +++ b/docs/modules/spark-k8s/pages/usage-guide/logging.adoc @@ -1,6 +1,6 @@ = Logging -The Spark operator installs a https://vector.dev/docs/setup/deployment/roles/#agent[vector agent] as a side-car container in every application Pod except the `job` Pod that runs `spark-submit`. It also configures the logging framework to output logs in XML format. This is the same https://logging.apache.org/log4j/2.x/manual/layouts.html#XMLLayout[format] used across all Stackable products and it enables the https://vector.dev/docs/setup/deployment/roles/#aggregator[vector aggregator] to collect logs across the entire platform. +The Spark operator installs a https://vector.dev/docs/setup/deployment/roles/#agent[vector agent] as a side-car container in every application Pod except the `job` Pod that runs `spark-submit`. It also configures the logging framework to output logs in XML format. This is the same https://logging.apache.org/log4j/2.x/manual/layouts.html#XMLLayout[format] used across all Stackable products and it enables the https://vector.dev/docs/setup/deployment/roles/#aggregator[vector aggregator] to collect logs across the entire platform. It is the user's responsibility to install and configure the vector aggregator, but the agents can discover the aggregator automatically using a discovery ConfigMap as described in the xref:concepts:logging.adoc[logging concepts]. From fc86d7af81bace6e973771f5c270f2e79588a730 Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Feb 2024 16:58:09 +0100 Subject: [PATCH 14/18] regenerate charts --- deploy/helm/spark-k8s-operator/crds/crds.yaml | 84 ------------------- 1 file changed, 84 deletions(-) diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 5d81f478..2a316d89 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -7061,90 +7061,6 @@ spec: config: default: {} properties: - logging: - default: - enableVectorAgent: null - containers: {} - description: Logging configuration, learn more in the [logging concept documentation](https://docs.stackable.tech/home/nightly/concepts/logging). - properties: - containers: - additionalProperties: - anyOf: - - required: - - custom - - {} - description: Log configuration of the container - properties: - console: - description: Configuration for the console appender - nullable: true - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - custom: - description: Custom log configuration provided in a ConfigMap - properties: - configMap: - description: ConfigMap containing the log configuration files - nullable: true - type: string - type: object - file: - description: Configuration for the file appender - nullable: true - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - loggers: - additionalProperties: - description: Configuration of a logger - properties: - level: - description: The log level threshold. Log events with a lower log level are discarded. - enum: - - TRACE - - DEBUG - - INFO - - WARN - - ERROR - - FATAL - - NONE - nullable: true - type: string - type: object - default: {} - description: Configuration per logger - type: object - type: object - description: Log configuration per container. - type: object - enableVectorAgent: - description: Wether or not to deploy a container with the Vector log agent. - nullable: true - type: boolean - type: object resources: default: memory: From f4ca6b554d7eb95e9367d5759d046ad551a93167 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 14 Feb 2024 17:13:42 +0100 Subject: [PATCH 15/18] Remove unnecessary volume mount --- rust/crd/src/lib.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 1f4e55f8..a27af7d7 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -300,18 +300,18 @@ impl SparkApplication { .with_config_map(log_config_map) .build(), ); - } - result.push( - VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG) - .with_empty_dir( - None::, - Some(product_logging::framework::calculate_log_volume_size_limit( - &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE], - )), - ) - .build(), - ); + result.push( + VolumeBuilder::new(VOLUME_MOUNT_NAME_LOG) + .with_empty_dir( + None::, + Some(product_logging::framework::calculate_log_volume_size_limit( + &[MAX_SPARK_LOG_FILES_SIZE, MAX_INIT_LOG_FILES_SIZE], + )), + ) + .build(), + ); + } if !self.packages().is_empty() { result.push( From d78b3d24932aa1e56c6f4118df30a7409b30f0e0 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Wed, 14 Feb 2024 17:14:48 +0100 Subject: [PATCH 16/18] Remove the logging configuration for the submit job in all tests --- tests/templates/kuttl/delta/40-spark-app.yaml.j2 | 3 --- tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 | 3 --- .../pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 4 ---- tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 | 2 -- tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 | 4 ---- tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-examples/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 | 4 ---- .../kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 | 4 ---- 13 files changed, 48 deletions(-) diff --git a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 index 981549f9..74be4022 100644 --- a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 +++ b/tests/templates/kuttl/delta/40-spark-app.yaml.j2 @@ -24,9 +24,6 @@ spec: accessStyle: Path credentials: secretClass: s3-credentials-class - job: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 index ede72f0d..89143cee 100644 --- a/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/iceberg/10-deploy-spark-app.yaml.j2 @@ -24,9 +24,6 @@ spec: spark.sql.catalog.local: org.apache.iceberg.spark.SparkCatalog spark.sql.catalog.local.type: hadoop spark.sql.catalog.local.warehouse: /tmp/warehouse - job: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 index ce607ef2..35aa5dc6 100644 --- a/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/pyspark-ny-public-s3-image/10-deploy-spark-app.yaml.j2 @@ -29,10 +29,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 26f9a795..90986e01 100644 --- a/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/pyspark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -28,10 +28,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 index 23f885d2..cc8b9e5a 100644 --- a/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2 @@ -24,8 +24,6 @@ spec: spark.kubernetes.executor.podNamePrefix: "resources-crd" job: config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} resources: cpu: min: 250m diff --git a/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 index d04aec51..d2abc94b 100644 --- a/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/resources/12-deploy-spark-app.yaml.j2 @@ -33,10 +33,6 @@ spec: spark.executor.memory: "2g" spark.executor.memoryOverheadFactor: "0.4" spark.executor.instances: "1" - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 index b806109c..03a60a71 100644 --- a/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/smoke/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 index 0658c426..c8804ab8 100644 --- a/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-examples/10-deploy-spark-app.yaml.j2 @@ -18,10 +18,6 @@ spec: mode: cluster mainClass: org.apache.spark.examples.SparkALS mainApplicationFile: "local:///stackable/spark/examples/jars/spark-examples.jar" - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 index 54508763..39203355 100644 --- a/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-history-server/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 index 9bc1f4fc..45aa28f9 100644 --- a/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-history-server/12-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: prefix: eventlogs/ bucket: reference: spark-history-s3-bucket - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 index 756e13fa..4bf26655 100644 --- a/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-ny-public-s3/10-deploy-spark-app.yaml.j2 @@ -34,10 +34,6 @@ spec: - "--input /arguments/job-args.txt" s3connection: reference: minio - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 index 953f3945..a1219635 100644 --- a/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-private-s3/10-deploy-spark-app.yaml.j2 @@ -25,10 +25,6 @@ spec: accessStyle: Path credentials: secretClass: spark-pi-private-s3-credentials-class - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: diff --git a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 index a5dcf506..3ac11bae 100644 --- a/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 +++ b/tests/templates/kuttl/spark-pi-public-s3/10-deploy-spark-app.yaml.j2 @@ -23,10 +23,6 @@ spec: host: test-minio port: 9000 accessStyle: Path - job: - config: - logging: - enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }} driver: config: logging: From 61a82ff73554ff8e7bef2499820196e7e33f39dd Mon Sep 17 00:00:00 2001 From: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com> Date: Wed, 14 Feb 2024 17:23:36 +0100 Subject: [PATCH 17/18] Implement review feedback --- .../kuttl/{delta => delta-lake}/01-assert.yaml.j2 | 0 ...stall-vector-aggregator-discovery-configmap.yaml.j2 | 0 .../kuttl/{delta => delta-lake}/10-s3-secret.yaml | 0 .../kuttl/{delta => delta-lake}/20-assert.yaml | 0 .../kuttl/{delta => delta-lake}/20-setup-minio.yaml | 0 .../kuttl/{delta => delta-lake}/30-prepare-bucket.yaml | 0 .../kuttl/{delta => delta-lake}/40-assert.yaml | 2 +- .../kuttl/{delta => delta-lake}/40-spark-app.yaml.j2 | 10 +++++----- .../helm-bitnami-minio-values.yaml | 0 tests/test-definition.yaml | 6 +++--- 10 files changed, 9 insertions(+), 9 deletions(-) rename tests/templates/kuttl/{delta => delta-lake}/01-assert.yaml.j2 (100%) rename tests/templates/kuttl/{delta => delta-lake}/01-install-vector-aggregator-discovery-configmap.yaml.j2 (100%) rename tests/templates/kuttl/{delta => delta-lake}/10-s3-secret.yaml (100%) rename tests/templates/kuttl/{delta => delta-lake}/20-assert.yaml (100%) rename tests/templates/kuttl/{delta => delta-lake}/20-setup-minio.yaml (100%) rename tests/templates/kuttl/{delta => delta-lake}/30-prepare-bucket.yaml (100%) rename tests/templates/kuttl/{delta => delta-lake}/40-assert.yaml (88%) rename tests/templates/kuttl/{delta => delta-lake}/40-spark-app.yaml.j2 (88%) rename tests/templates/kuttl/{delta => delta-lake}/helm-bitnami-minio-values.yaml (100%) diff --git a/tests/templates/kuttl/delta/01-assert.yaml.j2 b/tests/templates/kuttl/delta-lake/01-assert.yaml.j2 similarity index 100% rename from tests/templates/kuttl/delta/01-assert.yaml.j2 rename to tests/templates/kuttl/delta-lake/01-assert.yaml.j2 diff --git a/tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 b/tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 similarity index 100% rename from tests/templates/kuttl/delta/01-install-vector-aggregator-discovery-configmap.yaml.j2 rename to tests/templates/kuttl/delta-lake/01-install-vector-aggregator-discovery-configmap.yaml.j2 diff --git a/tests/templates/kuttl/delta/10-s3-secret.yaml b/tests/templates/kuttl/delta-lake/10-s3-secret.yaml similarity index 100% rename from tests/templates/kuttl/delta/10-s3-secret.yaml rename to tests/templates/kuttl/delta-lake/10-s3-secret.yaml diff --git a/tests/templates/kuttl/delta/20-assert.yaml b/tests/templates/kuttl/delta-lake/20-assert.yaml similarity index 100% rename from tests/templates/kuttl/delta/20-assert.yaml rename to tests/templates/kuttl/delta-lake/20-assert.yaml diff --git a/tests/templates/kuttl/delta/20-setup-minio.yaml b/tests/templates/kuttl/delta-lake/20-setup-minio.yaml similarity index 100% rename from tests/templates/kuttl/delta/20-setup-minio.yaml rename to tests/templates/kuttl/delta-lake/20-setup-minio.yaml diff --git a/tests/templates/kuttl/delta/30-prepare-bucket.yaml b/tests/templates/kuttl/delta-lake/30-prepare-bucket.yaml similarity index 100% rename from tests/templates/kuttl/delta/30-prepare-bucket.yaml rename to tests/templates/kuttl/delta-lake/30-prepare-bucket.yaml diff --git a/tests/templates/kuttl/delta/40-assert.yaml b/tests/templates/kuttl/delta-lake/40-assert.yaml similarity index 88% rename from tests/templates/kuttl/delta/40-assert.yaml rename to tests/templates/kuttl/delta-lake/40-assert.yaml index 477afd3a..f9d38189 100644 --- a/tests/templates/kuttl/delta/40-assert.yaml +++ b/tests/templates/kuttl/delta-lake/40-assert.yaml @@ -7,6 +7,6 @@ timeout: 300 apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: - name: pyspark-delta + name: pyspark-delta-lake status: phase: Succeeded diff --git a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 b/tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 similarity index 88% rename from tests/templates/kuttl/delta/40-spark-app.yaml.j2 rename to tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 index 74be4022..4ad17431 100644 --- a/tests/templates/kuttl/delta/40-spark-app.yaml.j2 +++ b/tests/templates/kuttl/delta-lake/40-spark-app.yaml.j2 @@ -2,17 +2,17 @@ apiVersion: spark.stackable.tech/v1alpha1 kind: SparkApplication metadata: - name: pyspark-delta + name: pyspark-delta-lake spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} vectorAggregatorConfigMapName: vector-aggregator-discovery {% endif %} sparkImage: -{% if test_scenario['values']['spark-delta'].find(",") > 0 %} - custom: "{{ test_scenario['values']['spark-delta'].split(',')[1] }}" - productVersion: "{{ test_scenario['values']['spark-delta'].split(',')[0] }}" +{% if test_scenario['values']['spark-delta-lake'].find(",") > 0 %} + custom: "{{ test_scenario['values']['spark-delta-lake'].split(',')[1] }}" + productVersion: "{{ test_scenario['values']['spark-delta-lake'].split(',')[0] }}" {% else %} - productVersion: "{{ test_scenario['values']['spark-delta'] }}" + productVersion: "{{ test_scenario['values']['spark-delta-lake'] }}" {% endif %} pullPolicy: IfNotPresent mode: cluster diff --git a/tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml b/tests/templates/kuttl/delta-lake/helm-bitnami-minio-values.yaml similarity index 100% rename from tests/templates/kuttl/delta/helm-bitnami-minio-values.yaml rename to tests/templates/kuttl/delta-lake/helm-bitnami-minio-values.yaml diff --git a/tests/test-definition.yaml b/tests/test-definition.yaml index d95461f4..fb5967fb 100644 --- a/tests/test-definition.yaml +++ b/tests/test-definition.yaml @@ -10,7 +10,7 @@ dimensions: # Alternatively, if you want to use a custom image, append a comma and the full image name to the product version # as in the example below. # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev - - name: spark-delta + - name: spark-delta-lake values: - 3.5.0 # - 3.5.0,docker.stackable.tech/sandbox/spark-k8s:3.5.0-stackable0.0.0-dev @@ -77,9 +77,9 @@ tests: - name: iceberg dimensions: - spark - - name: delta + - name: delta-lake dimensions: - - spark-delta + - spark-delta-lake - delta suites: - name: nightly From 38152363fa7b2dd122114bcfdb6deb87e6467498 Mon Sep 17 00:00:00 2001 From: Siegfried Weber Date: Thu, 15 Feb 2024 08:52:40 +0100 Subject: [PATCH 18/18] Fix assertions in the resources test --- tests/templates/kuttl/resources/10-assert.yaml.j2 | 3 --- tests/templates/kuttl/resources/12-assert.yaml.j2 | 3 --- 2 files changed, 6 deletions(-) diff --git a/tests/templates/kuttl/resources/10-assert.yaml.j2 b/tests/templates/kuttl/resources/10-assert.yaml.j2 index 5e33a5fd..eb4e706b 100644 --- a/tests/templates/kuttl/resources/10-assert.yaml.j2 +++ b/tests/templates/kuttl/resources/10-assert.yaml.j2 @@ -19,9 +19,6 @@ spec: requests: cpu: 250m memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} --- apiVersion: v1 kind: Pod diff --git a/tests/templates/kuttl/resources/12-assert.yaml.j2 b/tests/templates/kuttl/resources/12-assert.yaml.j2 index ac9cd049..35ef7245 100644 --- a/tests/templates/kuttl/resources/12-assert.yaml.j2 +++ b/tests/templates/kuttl/resources/12-assert.yaml.j2 @@ -19,9 +19,6 @@ spec: requests: cpu: 100m memory: 512Mi -{% if lookup('env', 'VECTOR_AGGREGATOR') %} - - name: vector -{% endif %} --- apiVersion: v1 kind: Pod