updated changelog and updated usage doc

adwk67 · adwk67 · commit 11015b44fdf0 · 2022-04-12T17:22:31.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ All notable changes to this project will be documented in this file.
 - Initial commit
 - ServiceAccount, ClusterRole and RoleBinding for Spark driver ([#39])
 - S3 credentials can be provided via a Secret ([#42])
+- Job information can be passed via a configuration map ([#50])
 
 [#39]: https://github.com/stackabletech/spark-k8s-operator/pull/39
 [#42]: https://github.com/stackabletech/spark-k8s-operator/pull/42
+[#50]: https://github.com/stackabletech/spark-k8s-operator/pull/50
diff --git a/docs/modules/ROOT/examples/example-configmap.yaml b/docs/modules/ROOT/examples/example-configmap.yaml
@@ -0,0 +1,8 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: cm-job-arguments # <1>
+data:
+  job-args.txt: |
+    s3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv # <2>
diff --git a/docs/modules/ROOT/examples/example-sparkapp-configmap.yaml b/docs/modules/ROOT/examples/example-sparkapp-configmap.yaml
@@ -0,0 +1,42 @@
+---
+apiVersion: spark.stackable.tech/v1alpha1
+kind: SparkApplication
+metadata:
+  name: ny-tlc-report-configmap
+  namespace: default
+spec:
+  version: "1.0"
+  sparkImage: docker.stackable.tech/stackable/spark-k8s:3.2.1-hadoop3.2-stackable0.4.0
+  mode: cluster
+  mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.1.0.jar # <3>
+  mainClass: tech.stackable.demo.spark.NYTLCReport
+  volumes:
+    - name: job-deps
+      persistentVolumeClaim:
+        claimName: pvc-ksv
+  args:
+    - "--input /arguments/job-args.txt" # <4>
+  sparkConf:
+    "spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
+    "spark.driver.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
+    "spark.executor.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
+  driver:
+    cores: 1
+    coreLimit: "1200m"
+    memory: "512m"
+    volumeMounts:
+      - name: job-deps
+        mountPath: /dependencies
+    configMapMounts:
+      - configMapName: cm-job-arguments # <5>
+        path: /arguments  # <6>
+  executor:
+    cores: 1
+    instances: 3
+    memory: "512m"
+    volumeMounts:
+      - name: job-deps
+        mountPath: /dependencies
+    configMapMounts:
+      - configMapName: cm-job-arguments # <5>
+        path: /arguments # <6>
diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc
@@ -92,15 +92,31 @@ include::example$example-sparkapp-pvc.yaml[]
 include::example$example-sparkapp-s3-private.yaml[]
 ----
 
-<1> Job python artifact (local)
+<1> Job python artifact (located in S3)
 <2> Artifact class
-<3> S3 section, specifying the existing secret and S3 end-point ( in this case, Min-IO)
+<3> S3 section, specifying the existing secret and S3 end-point (in this case, MinIO)
 <4> Credentials secret
 <5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources...
 <6> ...in this case, in s3, accessed with the credentials defined in the secret
 <7> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing
 <8> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors
 
+=== JVM (Scala): externally located artifact accessed with job arguments provided via configuration map
+
+[source,yaml]
+----
+include::example$example-configmap.yaml[]
+----
+[source,yaml]
+----
+include::example$example-sparkapp-configmap.yaml[]
+----
+<1> Name of the configuration map
+<2> Argument required by the job
+<3> Job scala artifact that requires an input argument
+<4> The expected job argument, accessed via the mounted configuration map file
+<5> The name of the configuration map that will be mounted to the driver/executor
+<6> The mount location of the configuration map (this will contain a file `/arguments/job-args.txt`)
 
 == CRD argument coverage
 
@@ -187,6 +203,12 @@ Below are listed the CRD fields that can be defined by the user:
 |`spec.driver.volumeMounts.mountPath`
 |Volume mount path
 
+|`spec.driver.configMapMounts.configMapName`
+|Name of configuration map to be mounted in the driver
+
+|`spec.driver.configMapMounts.path`
+|Mount path of the configuration map in the driver
+
 |`spec.executor.cores`
 |Number of cores for each executor
 
@@ -204,5 +226,11 @@ Below are listed the CRD fields that can be defined by the user:
 
 |`spec.executor.volumeMounts.mountPath`
 |Volume mount path
+
+|`spec.executor.configMapMounts.configMapName`
+|Name of configuration map to be mounted in the executor
+
+|`spec.executor.configMapMounts.path`
+|Mount path of the configuration map in the executor
 |===