stackabletech · adwk67 · Jun 17, 2022 · Jun 20, 2022 · Jun 20, 2022 · Jun 20, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,12 +4,20 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+### Added
+
+### Changed
+
+- BREAKING: Use current S3 connection/bucket structs ([#86])
+
+[#86]: https://github.com/stackabletech/spark-k8s-operator/pull/86
+
 ## [0.2.0] - 2022-06-21
 
 ### Added
 
 - Added new fields to govern image pull policy ([#75])
-- New `nodeSelector` fields for both the driver and the excutors ([#76])
+- New `nodeSelector` fields for both the driver and the executors ([#76])
 - Mirror driver pod status to the corresponding spark application ([#77])
 
 [#75]: https://github.com/stackabletech/spark-k8s-operator/pull/75

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/deploy/crd/sparkapplication.crd.yaml b/deploy/crd/sparkapplication.crd.yaml
@@ -299,18 +299,51 @@ spec:
                             inline:
                               description: S3 connection definition as CRD.
                               properties:
+                                accessStyle:
+                                  description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on <https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html>"
+                                  enum:
+                                    - Path
+                                    - VirtualHosted
+                                  nullable: true
+                                  type: string
+                                credentials:
+                                  description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient."
+                                  nullable: true
+                                  properties:
+                                    scope:
+                                      description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)"
+                                      nullable: true
+                                      properties:
+                                        node:
+                                          default: false
+                                          type: boolean
+                                        pod:
+                                          default: false
+                                          type: boolean
+                                        services:
+                                          default: []
+                                          items:
+                                            type: string
+                                          type: array
+                                      type: object
+                                    secretClass:
+                                      description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials"
+                                      type: string
+                                  required:
+                                    - secretClass
+                                  type: object
                                 host:
+                                  description: Hostname of the S3 server without any protocol or port
                                   nullable: true
                                   type: string
                                 port:
+                                  description: Port the S3 server listens on. If not specified the products will determine the port to use.
                                   format: uint16
                                   minimum: 0.0
                                   nullable: true
                                   type: integer
-                                secretClass:
-                                  nullable: true
-                                  type: string
                                 tls:
+                                  description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting.
                                   nullable: true
                                   properties:
                                     verification:

diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml
@@ -301,18 +301,51 @@ spec:
                             inline:
                               description: S3 connection definition as CRD.
                               properties:
+                                accessStyle:
+                                  description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on <https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html>"
+                                  enum:
+                                    - Path
+                                    - VirtualHosted
+                                  nullable: true
+                                  type: string
+                                credentials:
+                                  description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient."
+                                  nullable: true
+                                  properties:
+                                    scope:
+                                      description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)"
+                                      nullable: true
+                                      properties:
+                                        node:
+                                          default: false
+                                          type: boolean
+                                        pod:
+                                          default: false
+                                          type: boolean
+                                        services:
+                                          default: []
+                                          items:
+                                            type: string
+                                          type: array
+                                      type: object
+                                    secretClass:
+                                      description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials"
+                                      type: string
+                                  required:
+                                    - secretClass
+                                  type: object
                                 host:
+                                  description: Hostname of the S3 server without any protocol or port
                                   nullable: true
                                   type: string
                                 port:
+                                  description: Port the S3 server listens on. If not specified the products will determine the port to use.
                                   format: uint16
                                   minimum: 0.0
                                   nullable: true
                                   type: integer
-                                secretClass:
-                                  nullable: true
-                                  type: string
                                 tls:
+                                  description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting.
                                   nullable: true
                                   properties:
                                     verification:

diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml
@@ -302,18 +302,51 @@ spec:
                             inline:
                               description: S3 connection definition as CRD.
                               properties:
+                                accessStyle:
+                                  description: "Which access style to use. Defaults to virtual hosted-style as most of the data products out there. Have a look at the official documentation on <https://docs.aws.amazon.com/AmazonS3/latest/userguide/VirtualHosting.html>"
+                                  enum:
+                                    - Path
+                                    - VirtualHosted
+                                  nullable: true
+                                  type: string
+                                credentials:
+                                  description: "If the S3 uses authentication you have to specify you S3 credentials. In the most cases a SecretClass providing `accessKey` and `secretKey` is sufficient."
+                                  nullable: true
+                                  properties:
+                                    scope:
+                                      description: "[Scope](https://docs.stackable.tech/secret-operator/scope.html) of the [SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html)"
+                                      nullable: true
+                                      properties:
+                                        node:
+                                          default: false
+                                          type: boolean
+                                        pod:
+                                          default: false
+                                          type: boolean
+                                        services:
+                                          default: []
+                                          items:
+                                            type: string
+                                          type: array
+                                      type: object
+                                    secretClass:
+                                      description: "[SecretClass](https://docs.stackable.tech/secret-operator/secretclass.html) containing the LDAP bind credentials"
+                                      type: string
+                                  required:
+                                    - secretClass
+                                  type: object
                                 host:
+                                  description: Hostname of the S3 server without any protocol or port
                                   nullable: true
                                   type: string
                                 port:
+                                  description: Port the S3 server listens on. If not specified the products will determine the port to use.
                                   format: uint16
                                   minimum: 0.0
                                   nullable: true
                                   type: integer
-                                secretClass:
-                                  nullable: true
-                                  type: string
                                 tls:
+                                  description: If you want to use TLS when talking to S3 you can enable TLS encrypted communication with this setting.
                                   nullable: true
                                   properties:
                                     verification:

diff --git a/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml b/docs/modules/ROOT/examples/example-sparkapp-s3-private.yaml
@@ -16,10 +16,11 @@ spec:
         inline:
           host: test-minio
           port: 9000
-          secretClass: minio-credentials  # <4>
+          accessStyle: Path
+          credentials: # <4>
+            secretClass: s3-credentials-class
   sparkConf: # <5>
     spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider" # <6>
-    spark.hadoop.fs.s3a.path.style.access: "true"
     spark.driver.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
     spark.executor.extraClassPath: "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
   volumes:

diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc
@@ -2,7 +2,7 @@
 
 == Create an Apache Spark job
 
-If you followed the installation instructions, you should now have a Stackable Operator for Apache Spark up and running and you are ready to create your first Apache Spark kubernetes cluster.
+If you followed the installation instructions, you should now have a Stackable Operator for Apache Spark up and running, and you are ready to create your first Apache Spark kubernetes cluster.
 
 The example below creates a job running on Apache Spark 3.2.1, using the spark-on-kubernetes paradigm described in the spark documentation. The application file is itself part of the spark distribution and `local` refers to the path on the driver/executors; there are no external dependencies.
 
@@ -64,11 +64,11 @@ include::example$example-sparkapp-external-dependencies.yaml[]
 include::example$example-sparkapp-image.yaml[]
 ----
 
-<1> Job image: this contains the job artifact that will retrieved from the volume mount backed by the PVC
+<1> Job image: this contains the job artifact that will be retrieved from the volume mount backed by the PVC
 <2> Job python artifact (local)
 <3> Job argument (external)
 <4> List of python job requirements: these will be installed in the pods via `pip`
-<5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in s3)
+<5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in an S3 store)
 <6> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing
 <7> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors
 
@@ -81,7 +81,7 @@ include::example$example-sparkapp-pvc.yaml[]
 
 <1> Job artifact located on S3.
 <2> Job main class
-<3> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in s3, accessed without credentials)
+<3> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources (in this case, in an S3 store, accessed without credentials)
 <4> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing
 <5> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors
 
@@ -92,12 +92,12 @@ include::example$example-sparkapp-pvc.yaml[]
 include::example$example-sparkapp-s3-private.yaml[]
 ----
 
-<1> Job python artifact (located in S3)
+<1> Job python artifact (located in an S3 store)
 <2> Artifact class
 <3> S3 section, specifying the existing secret and S3 end-point (in this case, MinIO)
-<4> Credentials secret
+<4> Credentials referencing a secretClass (not shown in is example)
 <5> Spark dependencies: the credentials provider (the user knows what is relevant here) plus dependencies needed to access external resources...
-<6> ...in this case, in s3, accessed with the credentials defined in the secret
+<6> ...in this case, in an S3 store, accessed with the credentials defined in the secret
 <7> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing
 <8> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors
 
@@ -121,7 +121,7 @@ include::example$example-sparkapp-configmap.yaml[]
 
 == S3 bucket specification
 
-You can specify S3 connection details directly inside the `SparkApplication` specification or by refering to an external `S3Bucket` custom resource.
+You can specify S3 connection details directly inside the `SparkApplication` specification or by referring to an external `S3Bucket` custom resource.
 
 To specify S3 connection details directly as part of the `SparkApplication` resource you add an inline bucket configuration as shown below.
 
@@ -134,7 +134,9 @@ s3bucket:  # <1>
       inline:
         host: test-minio # <3>
         port: 9000 # <4>
-        secretClass: minio-credentials  # <5>
+        accessStyle: Path
+        credentials:
+          secretClass: s3-credentials-class  # <5>
 ----
 <1> Entry point for the bucket configuration.
 <2> Bucket name.
@@ -166,7 +168,9 @@ spec:
     inline:
       host: test-minio
       port: 9000
-      secretClass: minio-credentials
+      accessStyle: Path
+      credentials:
+        secretClass: minio-credentials-class
 ----
 
 This has the advantage that bucket configuration can be shared across `SparkApplication`s and reduces the cost of updating these details.

diff --git a/rust/crd/Cargo.toml b/rust/crd/Cargo.toml
@@ -8,7 +8,7 @@ repository = "https://github.com/stackabletech/spark-k8s-operator"
 version = "0.3.0-nightly"
 
 [dependencies]
-stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.19.0" }
+stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.21.0" }
 
 semver = "1.0"
 serde = { version = "1.0", features = ["derive"] }

diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs
@@ -17,7 +17,6 @@ pub const CONTAINER_NAME_DRIVER: &str = "spark-driver";
 pub const CONTAINER_IMAGE_NAME_EXECUTOR: &str = "dummy-overwritten-by-command-line";
 pub const CONTAINER_NAME_EXECUTOR: &str = "spark-executor";
 
-pub const ENV_AWS_ACCESS_KEY_ID: &str = "AWS_ACCESS_KEY_ID";
-pub const ENV_AWS_SECRET_ACCESS_KEY: &str = "AWS_SECRET_ACCESS_KEY";
 pub const ACCESS_KEY_ID: &str = "accessKeyId";
 pub const SECRET_ACCESS_KEY: &str = "secretAccessKey";
+pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets";