diff --git a/CHANGELOG.md b/CHANGELOG.md index 07f5ffe6..25da2e1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,11 @@ All notable changes to this project will be documented in this file. - Added new fields to govern image pull policy ([#75]) - New `nodeSelector` fields for both the driver and the excutors ([#76]) +- Mirror driver pod status to the corresponding spark application ([#77]) [#75]: https://github.com/stackabletech/spark-k8s-operator/pull/75 [#76]: https://github.com/stackabletech/spark-k8s-operator/pull/76 +[#77]: https://github.com/stackabletech/spark-k8s-operator/pull/77 ### Changed diff --git a/Cargo.lock b/Cargo.lock index b0371163..755ac244 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -467,12 +467,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "futures" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" - [[package]] name = "futures" version = "0.3.21" @@ -550,7 +544,6 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ - "futures 0.1.31", "futures-channel", "futures-core", "futures-io", @@ -852,7 +845,7 @@ dependencies = [ "chrono", "dirs-next", "either", - "futures 0.3.21", + "futures", "http", "http-body", "hyper", @@ -918,7 +911,7 @@ dependencies = [ "ahash", "backoff", "derivative", - "futures 0.3.21", + "futures", "json-patch", "k8s-openapi", "kube-client", @@ -1680,7 +1673,7 @@ dependencies = [ "const_format", "derivative", "either", - "futures 0.3.21", + "futures", "json-patch", "k8s-openapi", "kube", @@ -1734,9 +1727,8 @@ dependencies = [ "anyhow", "built", "clap", - "fnv", - "futures 0.3.21", - "lazy_static", + "futures", + "serde", "serde_yaml", "snafu", "stackable-operator", @@ -1744,6 +1736,7 @@ dependencies = [ "strum", "tokio", "tracing", + "tracing-futures", ] [[package]] @@ -2068,6 +2061,18 @@ dependencies = [ "valuable", ] +[[package]] +name = "tracing-futures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97d095ae15e245a057c8e8451bab9b3ee1e1f68e9ba2b4fbc18d0ac5237835f2" +dependencies = [ + "futures", + "futures-task", + "pin-project", + "tracing", +] + [[package]] name = "tracing-log" version = "0.1.3" diff --git a/deploy/config-spec/properties.yaml b/deploy/config-spec/properties.yaml index fca1a2fa..c8bdb8de 100644 --- a/deploy/config-spec/properties.yaml +++ b/deploy/config-spec/properties.yaml @@ -1,276 +1,6 @@ +--- +# Not used. Kept for compatibilty with Dockerfile. version: 0.1.0 spec: - units: - - unit: &unitPassword - name: "password" - regex: "^[a-zA-Z]\\w{5,20}$" - - unit: &unitDirectory - name: "directory" - regex: "^(.*)/?([^/]+)$" - examples: - - "/tmp/xyz" - - unit: &unitMemory - name: "memory" - regex: "(^\\p{N}+)(?:\\s*)((?:b|k|m|g|t|p|kb|mb|gb|tb|pb)\\b$)" - examples: - - "1024b" - - "1024kb" - - "500m" - - "1g" - -properties: - - property: &sparkWorkerCores - propertyNames: - - name: "SPARK_WORKER_CORES" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "integer" - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total number of cores to allow Spark jobs to use on the machine (default: all available cores)" - - - property: &sparkWorkerMemory - propertyNames: - - name: "SPARK_WORKER_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's individual memory is configured using SPARK_MEM." - - - property: &sparkDaemonMemory - propertyNames: - - name: "SPARK_DAEMON_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - defaultValues: - - fromVersion: "0.6.2" - to_version: "1.4.1" - value: "512m" - - fromVersion: "1.5.0" - value: "1g" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "0.6.2" - description: "Memory to allocate to the Spark master and worker daemons themselves (default: 512m/1g)" - - - property: &sparkEventLogEnabled - propertyNames: - - name: "spark.eventLog.enabled" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether to log Spark events, useful for reconstructing the Web UI after the application has finished." - - - property: &sparkEventLogDir - propertyNames: - - name: "spark.eventLog.dir" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.0.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "master" - required: false - - name: "slave" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - description: "Base directory in which Spark events are logged, if spark.eventLog.enabled is true. Within this base directory, Spark creates a sub-directory for each application, and logs the events specific to the application in this directory. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server." - - - property: &sparkHistoryLogDirectory - propertyNames: - - name: "spark.history.fs.logDirectory" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.1.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "history-server" - required: true - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - asOfVersion: "1.1.0" - description: "For the filesystem history provider, the URL to the directory containing application event logs to load. This can be a local file://path, an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs." - - - property: &sparkHistoryStorePath - propertyNames: - - name: "spark.history.store.path" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - roles: - - name: "history-server" - required: false - asOfVersion: "2.3.0" - description: "Local directory where to cache application history data. If set, the history server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart." - - - property: &sparkAuthenticate - propertyNames: - - name: "spark.authenticate" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether Spark authenticates its internal connections." - - - property: &sparkAuthenticateSecret - propertyNames: - - name: "spark.authenticate.secret" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitPassword - recommendedValues: - - fromVersion: "1.0.0" - value: "secret" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkAuthenticate - value: "true" - description: "The secret key used in the authentication. SPARK_AUTHENTICATE must be set to true." - - - property: &sparkPortMaxRetries - propertyNames: - - name: "spark.port.maxRetries" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "integer" - defaultValues: - - fromVersion: "1.1.1" - value: "16" - recommendedValues: - - fromVersion: "1.1.1" - value: "0" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "Maximum number of retries when binding to a port before giving up. When a port is given a specific value (non 0), each subsequent retry will increment the port used in the previous attempt by 1 before retrying. This essentially allows it to try a range of ports from the start port specified to port + maxRetries." - - - property: &sparkNoDaemonize - propertyNames: - - name: "SPARK_NO_DAEMONIZE" - kind: - type: "env" - datatype: - type: "bool" - recommendedValues: - - fromVersion: "2.0.0" - value: "true" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "2.0.0" - description: "Run spark processes in foreground if true. Useful for systemd (default: false)" - - - property: &sparkConfDir - propertyNames: - - name: "SPARK_CONF_DIR" - kind: - type: "env" - datatype: - type: "string" - recommendedValues: - - fromVersion: "1.1.1" - value: "/stackable/config" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "To specify a different configuration directory other than the default “SPARK_HOME/conf”, you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory." + units: [] +properties: [] diff --git a/deploy/crd/sparkapplication.crd.yaml b/deploy/crd/sparkapplication.crd.yaml index 6b7d1a1b..99b302ff 100644 --- a/deploy/crd/sparkapplication.crd.yaml +++ b/deploy/crd/sparkapplication.crd.yaml @@ -21,7 +21,6 @@ spec: description: "Auto-generated derived type for SparkApplicationSpec via `CustomResource`" properties: spec: - description: SparkApplicationStatus CommandStatus properties: args: items: @@ -1403,16 +1402,10 @@ spec: status: nullable: true properties: - finishedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true - type: string - startedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true + phase: type: string + required: + - phase type: object required: - spec diff --git a/deploy/helm/spark-k8s-operator/configs/properties.yaml b/deploy/helm/spark-k8s-operator/configs/properties.yaml index fca1a2fa..c8bdb8de 100644 --- a/deploy/helm/spark-k8s-operator/configs/properties.yaml +++ b/deploy/helm/spark-k8s-operator/configs/properties.yaml @@ -1,276 +1,6 @@ +--- +# Not used. Kept for compatibilty with Dockerfile. version: 0.1.0 spec: - units: - - unit: &unitPassword - name: "password" - regex: "^[a-zA-Z]\\w{5,20}$" - - unit: &unitDirectory - name: "directory" - regex: "^(.*)/?([^/]+)$" - examples: - - "/tmp/xyz" - - unit: &unitMemory - name: "memory" - regex: "(^\\p{N}+)(?:\\s*)((?:b|k|m|g|t|p|kb|mb|gb|tb|pb)\\b$)" - examples: - - "1024b" - - "1024kb" - - "500m" - - "1g" - -properties: - - property: &sparkWorkerCores - propertyNames: - - name: "SPARK_WORKER_CORES" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "integer" - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total number of cores to allow Spark jobs to use on the machine (default: all available cores)" - - - property: &sparkWorkerMemory - propertyNames: - - name: "SPARK_WORKER_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's individual memory is configured using SPARK_MEM." - - - property: &sparkDaemonMemory - propertyNames: - - name: "SPARK_DAEMON_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - defaultValues: - - fromVersion: "0.6.2" - to_version: "1.4.1" - value: "512m" - - fromVersion: "1.5.0" - value: "1g" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "0.6.2" - description: "Memory to allocate to the Spark master and worker daemons themselves (default: 512m/1g)" - - - property: &sparkEventLogEnabled - propertyNames: - - name: "spark.eventLog.enabled" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether to log Spark events, useful for reconstructing the Web UI after the application has finished." - - - property: &sparkEventLogDir - propertyNames: - - name: "spark.eventLog.dir" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.0.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "master" - required: false - - name: "slave" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - description: "Base directory in which Spark events are logged, if spark.eventLog.enabled is true. Within this base directory, Spark creates a sub-directory for each application, and logs the events specific to the application in this directory. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server." - - - property: &sparkHistoryLogDirectory - propertyNames: - - name: "spark.history.fs.logDirectory" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.1.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "history-server" - required: true - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - asOfVersion: "1.1.0" - description: "For the filesystem history provider, the URL to the directory containing application event logs to load. This can be a local file://path, an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs." - - - property: &sparkHistoryStorePath - propertyNames: - - name: "spark.history.store.path" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - roles: - - name: "history-server" - required: false - asOfVersion: "2.3.0" - description: "Local directory where to cache application history data. If set, the history server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart." - - - property: &sparkAuthenticate - propertyNames: - - name: "spark.authenticate" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether Spark authenticates its internal connections." - - - property: &sparkAuthenticateSecret - propertyNames: - - name: "spark.authenticate.secret" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitPassword - recommendedValues: - - fromVersion: "1.0.0" - value: "secret" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkAuthenticate - value: "true" - description: "The secret key used in the authentication. SPARK_AUTHENTICATE must be set to true." - - - property: &sparkPortMaxRetries - propertyNames: - - name: "spark.port.maxRetries" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "integer" - defaultValues: - - fromVersion: "1.1.1" - value: "16" - recommendedValues: - - fromVersion: "1.1.1" - value: "0" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "Maximum number of retries when binding to a port before giving up. When a port is given a specific value (non 0), each subsequent retry will increment the port used in the previous attempt by 1 before retrying. This essentially allows it to try a range of ports from the start port specified to port + maxRetries." - - - property: &sparkNoDaemonize - propertyNames: - - name: "SPARK_NO_DAEMONIZE" - kind: - type: "env" - datatype: - type: "bool" - recommendedValues: - - fromVersion: "2.0.0" - value: "true" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "2.0.0" - description: "Run spark processes in foreground if true. Useful for systemd (default: false)" - - - property: &sparkConfDir - propertyNames: - - name: "SPARK_CONF_DIR" - kind: - type: "env" - datatype: - type: "string" - recommendedValues: - - fromVersion: "1.1.1" - value: "/stackable/config" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "To specify a different configuration directory other than the default “SPARK_HOME/conf”, you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory." + units: [] +properties: [] diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml index 26cc7dbf..0c31374d 100644 --- a/deploy/helm/spark-k8s-operator/crds/crds.yaml +++ b/deploy/helm/spark-k8s-operator/crds/crds.yaml @@ -23,7 +23,6 @@ spec: description: "Auto-generated derived type for SparkApplicationSpec via `CustomResource`" properties: spec: - description: SparkApplicationStatus CommandStatus properties: args: items: @@ -1405,16 +1404,10 @@ spec: status: nullable: true properties: - finishedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true - type: string - startedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true + phase: type: string + required: + - phase type: object required: - spec diff --git a/deploy/manifests/configmap.yaml b/deploy/manifests/configmap.yaml index bd86b1c9..4f43785f 100644 --- a/deploy/manifests/configmap.yaml +++ b/deploy/manifests/configmap.yaml @@ -2,282 +2,12 @@ apiVersion: v1 data: properties.yaml: | + --- + # Not used. Kept for compatibilty with Dockerfile. version: 0.1.0 spec: - units: - - unit: &unitPassword - name: "password" - regex: "^[a-zA-Z]\\w{5,20}$" - - unit: &unitDirectory - name: "directory" - regex: "^(.*)/?([^/]+)$" - examples: - - "/tmp/xyz" - - unit: &unitMemory - name: "memory" - regex: "(^\\p{N}+)(?:\\s*)((?:b|k|m|g|t|p|kb|mb|gb|tb|pb)\\b$)" - examples: - - "1024b" - - "1024kb" - - "500m" - - "1g" - - properties: - - property: &sparkWorkerCores - propertyNames: - - name: "SPARK_WORKER_CORES" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "integer" - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total number of cores to allow Spark jobs to use on the machine (default: all available cores)" - - - property: &sparkWorkerMemory - propertyNames: - - name: "SPARK_WORKER_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's individual memory is configured using SPARK_MEM." - - - property: &sparkDaemonMemory - propertyNames: - - name: "SPARK_DAEMON_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - defaultValues: - - fromVersion: "0.6.2" - to_version: "1.4.1" - value: "512m" - - fromVersion: "1.5.0" - value: "1g" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "0.6.2" - description: "Memory to allocate to the Spark master and worker daemons themselves (default: 512m/1g)" - - - property: &sparkEventLogEnabled - propertyNames: - - name: "spark.eventLog.enabled" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether to log Spark events, useful for reconstructing the Web UI after the application has finished." - - - property: &sparkEventLogDir - propertyNames: - - name: "spark.eventLog.dir" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.0.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "master" - required: false - - name: "slave" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - description: "Base directory in which Spark events are logged, if spark.eventLog.enabled is true. Within this base directory, Spark creates a sub-directory for each application, and logs the events specific to the application in this directory. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server." - - - property: &sparkHistoryLogDirectory - propertyNames: - - name: "spark.history.fs.logDirectory" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.1.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "history-server" - required: true - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - asOfVersion: "1.1.0" - description: "For the filesystem history provider, the URL to the directory containing application event logs to load. This can be a local file://path, an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs." - - - property: &sparkHistoryStorePath - propertyNames: - - name: "spark.history.store.path" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - roles: - - name: "history-server" - required: false - asOfVersion: "2.3.0" - description: "Local directory where to cache application history data. If set, the history server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart." - - - property: &sparkAuthenticate - propertyNames: - - name: "spark.authenticate" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether Spark authenticates its internal connections." - - - property: &sparkAuthenticateSecret - propertyNames: - - name: "spark.authenticate.secret" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitPassword - recommendedValues: - - fromVersion: "1.0.0" - value: "secret" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkAuthenticate - value: "true" - description: "The secret key used in the authentication. SPARK_AUTHENTICATE must be set to true." - - - property: &sparkPortMaxRetries - propertyNames: - - name: "spark.port.maxRetries" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "integer" - defaultValues: - - fromVersion: "1.1.1" - value: "16" - recommendedValues: - - fromVersion: "1.1.1" - value: "0" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "Maximum number of retries when binding to a port before giving up. When a port is given a specific value (non 0), each subsequent retry will increment the port used in the previous attempt by 1 before retrying. This essentially allows it to try a range of ports from the start port specified to port + maxRetries." - - - property: &sparkNoDaemonize - propertyNames: - - name: "SPARK_NO_DAEMONIZE" - kind: - type: "env" - datatype: - type: "bool" - recommendedValues: - - fromVersion: "2.0.0" - value: "true" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "2.0.0" - description: "Run spark processes in foreground if true. Useful for systemd (default: false)" - - - property: &sparkConfDir - propertyNames: - - name: "SPARK_CONF_DIR" - kind: - type: "env" - datatype: - type: "string" - recommendedValues: - - fromVersion: "1.1.1" - value: "/stackable/config" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "To specify a different configuration directory other than the default “SPARK_HOME/conf”, you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory." + units: [] + properties: [] kind: ConfigMap metadata: name: spark-k8s-operator-configmap diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml index 20d5d011..0540e287 100644 --- a/deploy/manifests/crds.yaml +++ b/deploy/manifests/crds.yaml @@ -24,7 +24,6 @@ spec: description: "Auto-generated derived type for SparkApplicationSpec via `CustomResource`" properties: spec: - description: SparkApplicationStatus CommandStatus properties: args: items: @@ -1406,16 +1405,10 @@ spec: status: nullable: true properties: - finishedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true - type: string - startedAt: - description: Time is a wrapper around time.Time which supports correct marshaling to YAML and JSON. Wrappers are provided for many of the factory methods that the time package offers. - format: date-time - nullable: true + phase: type: string + required: + - phase type: object required: - spec diff --git a/docs/modules/ROOT/pages/commandline_args.adoc b/docs/modules/ROOT/pages/commandline_args.adoc index c3628eae..a790f307 100644 --- a/docs/modules/ROOT/pages/commandline_args.adoc +++ b/docs/modules/ROOT/pages/commandline_args.adoc @@ -1,17 +1,4 @@ -=== product-config - -*Default value*: `/etc/stackable/spark-k8s-operator/config-spec/properties.yaml` - -*Required*: false - -*Multiple values:* false - -[source] ----- -stackable-spark-k8s-operator run --product-config /foo/bar/properties.yaml ----- - === watch-namespace *Default value*: All namespaces diff --git a/docs/modules/ROOT/pages/env_var_args.adoc b/docs/modules/ROOT/pages/env_var_args.adoc index 6e33ddff..d283a70a 100644 --- a/docs/modules/ROOT/pages/env_var_args.adoc +++ b/docs/modules/ROOT/pages/env_var_args.adoc @@ -1,30 +1,4 @@ -=== PRODUCT_CONFIG - -*Default value*: `/etc/stackable/spark-k8s-operator/config-spec/properties.yaml` - -*Required*: false - -*Multiple values:* false - -[source] ----- -export PRODUCT_CONFIG=/foo/bar/properties.yaml -stackable-spark-k8s-operator run ----- - -or via docker: - ----- -docker run \ - --name spark-k8s-operator \ - --network host \ - --env KUBECONFIG=/home/stackable/.kube/config \ - --env PRODUCT_CONFIG=/my/product/config.yaml \ - --mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ - docker.stackable.tech/stackable/spark-k8s-operator:latest ----- - === WATCH_NAMESPACE *Default value*: All namespaces @@ -53,4 +27,3 @@ docker run \ --mount type=bind,source="$HOME/.kube/config",target="/home/stackable/.kube/config" \ docker.stackable.tech/stackable/spark-k8s-operator:latest ---- - diff --git a/docs/modules/ROOT/pages/installation.adoc b/docs/modules/ROOT/pages/installation.adoc index 2d68c955..d2bfb8f1 100644 --- a/docs/modules/ROOT/pages/installation.adoc +++ b/docs/modules/ROOT/pages/installation.adoc @@ -27,19 +27,19 @@ service. You are now ready to deploy Apache Spark in Kubernetes. == Building the operator from source -This operator is written in Rust and is developed against a recent stable Rust release. +To run it from your local machine - usually for development purposes - you need to install the required manifest files. [source,bash] ---- -cargo run -- crd | kubectl apply -f - -cargo run -- run +make renenerate-charts +kubectl create -f deploy/manifests ---- -To run it from your local machine - usually for development purposes - you need to create a `ClusterRoleBinding` : +Then, start the operator: [source,bash] ---- -kubectl create clusterrolebinding spark-role --clusterrole=spark-driver-edit-role --serviceaccount=default:default +cargo run -- run ---- == Additional/Optional components @@ -48,4 +48,4 @@ The above describes the installation of the operator alone and is sufficient for == Examples -The examples provided with the operator code show different ways of combining these elements. \ No newline at end of file +The examples provided with the operator code show different ways of combining these elements. diff --git a/docs/modules/ROOT/pages/rbac.adoc b/docs/modules/ROOT/pages/rbac.adoc index 83d890d2..7abb691a 100644 --- a/docs/modules/ROOT/pages/rbac.adoc +++ b/docs/modules/ROOT/pages/rbac.adoc @@ -6,30 +6,6 @@ The https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac[Spark-K However, to add security, each `spark-submit` job launched by the spark-k8s operator will be assigned its own service account. -When the spark-k8s operator is launched via helm, like this: +When the spark-k8s operator is installed via helm, a cluster role named `spark-driver-edit-role` is created with pre-defined permissions. -[source,bash] ----- -helm install spark-k8s-operator stackable/spark-k8s-operator ----- - -the operator will take care of these steps automatically: - -- a cluster role will be created with pre-defined permissions -- for each `SparkApplication` job, a service account will be created -- for each `SparkApplication` job, a role binding will connect the cluster role with the service account - -If the operator is started outside the cluster, like this: - -[source,bash] ----- -cargo run -- crd | kubectl apply -f - -cargo run -- run ----- - -then the cluster-role has to be created assigned to the service account manually e.g. - -[source,bash] ----- -kubectl create clusterrolebinding spark-role --clusterrole=spark-driver-edit-role --serviceaccount=default:default ----- \ No newline at end of file +When a new Spark application is submitted, the operator creates a new service account with the same name as the application and binds this account to the cluster role `spark-driver-edit-role` created by helm. diff --git a/docs/modules/ROOT/pages/usage.adoc b/docs/modules/ROOT/pages/usage.adoc index 5822c6f6..569d9f5b 100644 --- a/docs/modules/ROOT/pages/usage.adoc +++ b/docs/modules/ROOT/pages/usage.adoc @@ -43,7 +43,7 @@ The following examples have the following `spec` fields in common: Job-specific settings are annotated below. -=== Pyspark: externally located artifact and dataset +=== Pyspark: externally located artifact and dataset [source,yaml] ---- @@ -72,7 +72,7 @@ include::example$example-sparkapp-image.yaml[] <6> the name of the volume mount backed by a `PersistentVolumeClaim` that must be pre-existing <7> the path on the volume mount: this is referenced in the `sparkConf` section where the extra class path is defined for the driver and executors -=== JVM (Scala): externally located artifact and dataset +=== JVM (Scala): externally located artifact and dataset [source,yaml] ---- @@ -283,4 +283,3 @@ Below are listed the CRD fields that can be defined by the user: |`spec.executor.nodeSelector` |A dictionary of labels to use for node selection when scheduling the executors N.B. this assumes there are no implicit node dependencies (e.g. `PVC`, `VolumeMount`) defined elsewhere. |=== - diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 39d8c1ad..fbe7bf18 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -14,7 +14,6 @@ use std::collections::{BTreeMap, HashMap}; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, Snafu}; -use stackable_operator::k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; use stackable_operator::kube::ResourceExt; use stackable_operator::labels; use stackable_operator::{ @@ -41,14 +40,20 @@ pub enum Error { #[snafu(display("application has no Spark image"))] NoSparkImage, } -/// SparkApplicationStatus CommandStatus + +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] +#[serde(rename_all = "camelCase")] +pub struct SparkApplicationStatus { + pub phase: String, +} + #[derive(Clone, CustomResource, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)] #[kube( group = "spark.stackable.tech", version = "v1alpha1", kind = "SparkApplication", shortname = "sc", - status = "CommandStatus", + status = "SparkApplicationStatus", namespaced, crates( kube_core = "stackable_operator::kube::core", @@ -404,14 +409,6 @@ impl SparkApplication { } } -#[derive(Clone, Default, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] -#[serde(rename_all = "camelCase")] -pub struct SparkApplicationStatus { - /// An opaque value that changes every time a discovery detail does - #[serde(default, skip_serializing_if = "Option::is_none")] - pub discovery_hash: Option, -} - #[derive(Clone, Debug, Default, Deserialize, Eq, JsonSchema, PartialEq, Serialize)] #[serde(rename_all = "camelCase")] pub struct CommonConfig { @@ -479,15 +476,6 @@ impl ExecutorConfig { } } -#[derive(Clone, Debug, Default, Deserialize, Eq, PartialEq, Serialize, JsonSchema)] -#[serde(rename_all = "camelCase")] -pub struct CommandStatus { - #[serde(skip_serializing_if = "Option::is_none")] - pub started_at: Option