diff --git a/CHANGELOG.md b/CHANGELOG.md index c90bcf7b..49b38cc9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file. ### Added +- ServiceAccount, ClusterRole and RoleBinding for Spark driver ([#39]) - Initial commit -### Changed - +[#39]: https://github.com/stackabletech/spark-k8s-operator/pull/39 diff --git a/deploy/helm/spark-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml similarity index 86% rename from deploy/helm/spark-operator/templates/roles.yaml rename to deploy/helm/spark-k8s-operator/templates/roles.yaml index ee910ceb..5961fdd7 100644 --- a/deploy/helm/spark-operator/templates/roles.yaml +++ b/deploy/helm/spark-k8s-operator/templates/roles.yaml @@ -11,6 +11,12 @@ rules: verbs: - list - watch + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list - apiGroups: - "" resources: @@ -75,17 +81,17 @@ rules: verbs: - create - apiGroups: - - {{ include "operator.name" . }}.stackable.tech + - spark.stackable.tech resources: - - {{ include "operator.name" . }}clusters + - sparkapplications verbs: - get - list - patch - watch - apiGroups: - - {{ include "operator.name" . }}.stackable.tech + - spark.stackable.tech resources: - - {{ include "operator.name" . }}clusters/status + - sparkapplications/status verbs: - patch diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml new file mode 100644 index 00000000..25b80c05 --- /dev/null +++ b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: spark-driver-edit-role +rules: + - apiGroups: [""] + resources: ["pods", "services", "configmaps"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list diff --git a/deploy/helm/spark-operator/.helmignore b/deploy/helm/spark-operator/.helmignore deleted file mode 100644 index fef44b7e..00000000 --- a/deploy/helm/spark-operator/.helmignore +++ /dev/null @@ -1,28 +0,0 @@ -# ============= -# This file is automatically generated from the templates in stackabletech/operator-templating -# DON'T MANUALLY EDIT THIS FILE -# ============= - -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/deploy/helm/spark-operator/Chart.yaml b/deploy/helm/spark-operator/Chart.yaml deleted file mode 100644 index f230d066..00000000 --- a/deploy/helm/spark-operator/Chart.yaml +++ /dev/null @@ -1,10 +0,0 @@ ---- -apiVersion: v2 -name: spark-operator -version: 0.6.0-nightly -appVersion: "0.6.0-nightly" -description: The Stackable Operator for Apache Spark -home: https://github.com/stackabletech/spark-operator -maintainers: - - name: Stackable - url: https://www.stackable.tech diff --git a/deploy/helm/spark-operator/README.md b/deploy/helm/spark-operator/README.md deleted file mode 100644 index 04f2d216..00000000 --- a/deploy/helm/spark-operator/README.md +++ /dev/null @@ -1,33 +0,0 @@ -# Helm Chart for Stackable Operator for Apache Spark - -This Helm Chart can be used to install Custom Resource Definitions and the Operator for Apache Spark provided by Stackable. - - -## Requirements - -- Create a [Kubernetes Cluster](../Readme.md) -- Install [Helm](https://helm.sh/docs/intro/install/) - - -## Install the Stackable Operator for Apache Spark - -```bash -# From the root of the operator repository -make compile-chart - -helm install spark-operator deploy/helm/spark-operator -``` - - -## Usage of the CRDs - -The usage of this operator and its CRDs is described in the [documentation](https://docs.stackable.tech/spark/index.html) - -The operator has example requests included in the [`/examples`](https://github.com/stackabletech/spark/operator/tree/main/examples) directory. - - -## Links - -https://github.com/stackabletech/spark-operator - - diff --git a/deploy/helm/spark-operator/configs/properties.yaml b/deploy/helm/spark-operator/configs/properties.yaml deleted file mode 100644 index b9539d2e..00000000 --- a/deploy/helm/spark-operator/configs/properties.yaml +++ /dev/null @@ -1,276 +0,0 @@ -version: 0.1.0 -spec: - units: - - unit: &unitPassword - name: "password" - regex: "^[a-zA-Z]\\w{5,20}$" - - unit: &unitDirectory - name: "directory" - regex: "^(.+)/([^/]+)$" - examples: - - "/tmp/xyz" - - unit: &unitMemory - name: "memory" - regex: "(^\\p{N}+)(?:\\s*)((?:b|k|m|g|t|p|kb|mb|gb|tb|pb)\\b$)" - examples: - - "1024b" - - "1024kb" - - "500m" - - "1g" - -properties: - - property: &sparkWorkerCores - propertyNames: - - name: "SPARK_WORKER_CORES" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "integer" - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total number of cores to allow Spark jobs to use on the machine (default: all available cores)" - - - property: &sparkWorkerMemory - propertyNames: - - name: "SPARK_WORKER_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - roles: - - name: "slave" - required: false - asOfVersion: "0.6.2" - description: "Total amount of memory to allow Spark jobs to use on the machine, e.g. 1000M, 2G (default: total memory minus 1 GB); note that each job's individual memory is configured using SPARK_MEM." - - - property: &sparkDaemonMemory - propertyNames: - - name: "SPARK_DAEMON_MEMORY" - kind: - type: "file" - file: "spark-env.sh" - datatype: - type: "string" - unit: *unitMemory - defaultValues: - - fromVersion: "0.6.2" - to_version: "1.4.1" - value: "512m" - - fromVersion: "1.5.0" - value: "1g" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "0.6.2" - description: "Memory to allocate to the Spark master and worker daemons themselves (default: 512m/1g)" - - - property: &sparkEventLogEnabled - propertyNames: - - name: "spark.eventLog.enabled" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether to log Spark events, useful for reconstructing the Web UI after the application has finished." - - - property: &sparkEventLogDir - propertyNames: - - name: "spark.eventLog.dir" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.0.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "master" - required: false - - name: "slave" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - description: "Base directory in which Spark events are logged, if spark.eventLog.enabled is true. Within this base directory, Spark creates a sub-directory for each application, and logs the events specific to the application in this directory. Users may want to set this to a unified location like an HDFS directory so history files can be read by the history server." - - - property: &sparkHistoryLogDirectory - propertyNames: - - name: "spark.history.fs.logDirectory" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - defaultValues: - - fromVersion: "1.1.0" - value: "/tmp/spark-events" - recommendedValues: - - fromVersion: "1.0.0" - value: "/stackable/log" - roles: - - name: "history-server" - required: true - expandsTo: - - property: *sparkEventLogEnabled - value: "true" - asOfVersion: "1.1.0" - description: "For the filesystem history provider, the URL to the directory containing application event logs to load. This can be a local file://path, an HDFS path hdfs://namenode/shared/spark-logs or that of an alternative filesystem supported by the Hadoop APIs." - - - property: &sparkHistoryStorePath - propertyNames: - - name: "spark.history.store.path" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitDirectory - roles: - - name: "history-server" - required: false - asOfVersion: "2.3.0" - description: "Local directory where to cache application history data. If set, the history server will store application data on disk instead of keeping it in memory. The data written to disk will be re-used in the event of a history server restart." - - - property: &sparkAuthenticate - propertyNames: - - name: "spark.authenticate" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "bool" - defaultValues: - - fromVersion: "1.0.0" - value: "false" - recommendedValues: - - fromVersion: "1.0.0" - value: "true" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - description: "Whether Spark authenticates its internal connections." - - - property: &sparkAuthenticateSecret - propertyNames: - - name: "spark.authenticate.secret" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "string" - unit: *unitPassword - recommendedValues: - - fromVersion: "1.0.0" - value: "secret" - roles: - - name: "master" - required: false - - name: "slave" - required: false - - name: "history-server" - required: false - asOfVersion: "1.0.0" - expandsTo: - - property: *sparkAuthenticate - value: "true" - description: "The secret key used in the authentication. SPARK_AUTHENTICATE must be set to true." - - - property: &sparkPortMaxRetries - propertyNames: - - name: "spark.port.maxRetries" - kind: - type: "file" - file: "spark-defaults.conf" - datatype: - type: "integer" - defaultValues: - - fromVersion: "1.1.1" - value: "16" - recommendedValues: - - fromVersion: "1.1.1" - value: "0" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "Maximum number of retries when binding to a port before giving up. When a port is given a specific value (non 0), each subsequent retry will increment the port used in the previous attempt by 1 before retrying. This essentially allows it to try a range of ports from the start port specified to port + maxRetries." - - - property: &sparkNoDaemonize - propertyNames: - - name: "SPARK_NO_DAEMONIZE" - kind: - type: "env" - datatype: - type: "bool" - recommendedValues: - - fromVersion: "2.0.0" - value: "true" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "2.0.0" - description: "Run spark processes in foreground if true. Useful for systemd (default: false)" - - - property: &sparkConfDir - propertyNames: - - name: "SPARK_CONF_DIR" - kind: - type: "env" - datatype: - type: "string" - recommendedValues: - - fromVersion: "1.1.1" - value: "/stackable/config" - roles: - - name: "master" - required: true - - name: "slave" - required: true - - name: "history-server" - required: true - asOfVersion: "1.1.1" - description: "To specify a different configuration directory other than the default “SPARK_HOME/conf”, you can set SPARK_CONF_DIR. Spark will use the the configuration files (spark-defaults.conf, spark-env.sh, log4j.properties, etc) from this directory." diff --git a/deploy/helm/spark-operator/crds/crds.yaml b/deploy/helm/spark-operator/crds/crds.yaml deleted file mode 100644 index f03d537e..00000000 --- a/deploy/helm/spark-operator/crds/crds.yaml +++ /dev/null @@ -1,371 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - name: sparkapplications.spark.stackable.tech - annotations: - helm.sh/resource-policy: keep -spec: - group: spark.stackable.tech - names: - categories: [] - kind: SparkApplication - plural: sparkapplications - shortNames: - - sc - singular: sparkapplication - scope: Namespaced - versions: - - additionalPrinterColumns: [] - name: v1alpha1 - schema: - openAPIV3Schema: - description: "Auto-generated derived type for SparkApplicationSpec via `CustomResource`" - properties: - spec: - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - properties: - enableMonitoring: - nullable: true - type: boolean - logDir: - nullable: true - type: string - maxPortRetries: - format: uint - minimum: 0.0 - nullable: true - type: integer - secret: - nullable: true - type: string - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - historyServers: - nullable: true - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - properties: - storePath: - nullable: true - type: string - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - roleGroups: - additionalProperties: - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - properties: - storePath: - nullable: true - type: string - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - replicas: - format: uint16 - minimum: 0.0 - nullable: true - type: integer - selector: - description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. - nullable: true - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: "A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values." - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: "operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist." - type: string - values: - description: "values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch." - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: "matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is \"key\", the operator is \"In\", and the values array contains only \"value\". The requirements are ANDed." - type: object - type: object - type: object - type: object - required: - - roleGroups - type: object - masters: - nullable: true - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - roleGroups: - additionalProperties: - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - replicas: - format: uint16 - minimum: 0.0 - nullable: true - type: integer - selector: - description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. - nullable: true - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: "A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values." - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: "operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist." - type: string - values: - description: "values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch." - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: "matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is \"key\", the operator is \"In\", and the values array contains only \"value\". The requirements are ANDed." - type: object - type: object - type: object - type: object - required: - - roleGroups - type: object - stopped: - nullable: true - type: boolean - version: - nullable: true - type: string - workers: - nullable: true - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - properties: - cores: - format: uint - minimum: 0.0 - nullable: true - type: integer - memory: - nullable: true - type: string - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - roleGroups: - additionalProperties: - properties: - cliOverrides: - additionalProperties: - type: string - default: {} - type: object - config: - default: {} - properties: - cores: - format: uint - minimum: 0.0 - nullable: true - type: integer - memory: - nullable: true - type: string - type: object - configOverrides: - additionalProperties: - additionalProperties: - type: string - type: object - default: {} - type: object - envOverrides: - additionalProperties: - type: string - default: {} - type: object - replicas: - format: uint16 - minimum: 0.0 - nullable: true - type: integer - selector: - description: A label selector is a label query over a set of resources. The result of matchLabels and matchExpressions are ANDed. An empty label selector matches all objects. A null label selector matches no objects. - nullable: true - properties: - matchExpressions: - description: matchExpressions is a list of label selector requirements. The requirements are ANDed. - items: - description: "A label selector requirement is a selector that contains values, a key, and an operator that relates the key and values." - properties: - key: - description: key is the label key that the selector applies to. - type: string - operator: - description: "operator represents a key's relationship to a set of values. Valid operators are In, NotIn, Exists and DoesNotExist." - type: string - values: - description: "values is an array of string values. If the operator is In or NotIn, the values array must be non-empty. If the operator is Exists or DoesNotExist, the values array must be empty. This array is replaced during a strategic merge patch." - items: - type: string - type: array - required: - - key - - operator - type: object - type: array - matchLabels: - additionalProperties: - type: string - description: "matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels map is equivalent to an element of matchExpressions, whose key field is \"key\", the operator is \"In\", and the values array contains only \"value\". The requirements are ANDed." - type: object - type: object - type: object - type: object - required: - - roleGroups - type: object - type: object - status: - nullable: true - properties: - discoveryHash: - description: An opaque value that changes every time a discovery detail does - nullable: true - type: string - type: object - required: - - spec - title: SparkApplication - type: object - served: true - storage: true - subresources: - status: {} diff --git a/deploy/helm/spark-operator/templates/_helpers.tpl b/deploy/helm/spark-operator/templates/_helpers.tpl deleted file mode 100644 index 840391ac..00000000 --- a/deploy/helm/spark-operator/templates/_helpers.tpl +++ /dev/null @@ -1,76 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "operator.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-operator" }} -{{- end }} - -{{/* -Expand the name of the chart. -*/}} -{{- define "operator.appname" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "operator.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "operator.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "operator.labels" -}} -helm.sh/chart: {{ include "operator.chart" . }} -{{ include "operator.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "operator.selectorLabels" -}} -app.kubernetes.io/name: {{ include "operator.appname" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "operator.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "operator.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} - -{{/* -Labels for Kubernetes objects created by helm test -*/}} -{{- define "operator.testLabels" -}} -helm.sh/test: {{ include "operator.chart" . }} -{{- end }} \ No newline at end of file diff --git a/deploy/helm/spark-operator/templates/configmap.yaml b/deploy/helm/spark-operator/templates/configmap.yaml deleted file mode 100644 index 7fa8613d..00000000 --- a/deploy/helm/spark-operator/templates/configmap.yaml +++ /dev/null @@ -1,9 +0,0 @@ ---- -apiVersion: v1 -data: -{{ (.Files.Glob "configs/*").AsConfig | indent 2 }} -kind: ConfigMap -metadata: - name: {{ .Release.Name }}-configmap - labels: - {{- include "operator.labels" . | nindent 4 }} diff --git a/deploy/helm/spark-operator/templates/deployment.yaml b/deploy/helm/spark-operator/templates/deployment.yaml deleted file mode 100644 index 90a210d5..00000000 --- a/deploy/helm/spark-operator/templates/deployment.yaml +++ /dev/null @@ -1,58 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ .Release.Name }}-deployment - labels: - {{- include "operator.labels" . | nindent 4 }} -spec: - replicas: 1 - strategy: - type: Recreate - selector: - matchLabels: - {{- include "operator.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "operator.selectorLabels" . | nindent 8 }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ .Release.Name }}-serviceaccount - securityContext: - {{- toYaml .Values.podSecurityContext | nindent 8 }} - containers: - - name: {{ include "operator.appname" . }} - securityContext: - {{- toYaml .Values.securityContext | nindent 12 }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - resources: - {{- toYaml .Values.resources | nindent 12 }} - volumeMounts: - - mountPath: /etc/stackable/{{ include "operator.appname" . }}/config-spec - name: config-spec - volumes: - - name: config-spec - configMap: - name: {{ .Release.Name }}-configmap - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/deploy/helm/spark-operator/templates/serviceaccount.yaml b/deploy/helm/spark-operator/templates/serviceaccount.yaml deleted file mode 100644 index ff2b7f8e..00000000 --- a/deploy/helm/spark-operator/templates/serviceaccount.yaml +++ /dev/null @@ -1,29 +0,0 @@ ---- -{{ if .Values.serviceAccount.create -}} -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Release.Name }}-serviceaccount - labels: - {{- include "operator.labels" . | nindent 4 }} - {{- with .Values.serviceAccount.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} ---- -apiVersion: rbac.authorization.k8s.io/v1 -# This cluster role binding allows anyone in the "manager" group to read secrets in any namespace. -kind: ClusterRoleBinding -metadata: - name: {{ .Release.Name }}-clusterrolebinding - labels: - {{- include "operator.labels" . | nindent 4 }} -subjects: - - kind: ServiceAccount - name: {{ .Release.Name }}-serviceaccount - namespace: {{ .Release.Namespace }} -roleRef: - kind: ClusterRole - name: {{ .Release.Name }}-clusterrole - apiGroup: rbac.authorization.k8s.io -{{- end }} diff --git a/deploy/helm/spark-operator/values.yaml b/deploy/helm/spark-operator/values.yaml deleted file mode 100644 index e74cf772..00000000 --- a/deploy/helm/spark-operator/values.yaml +++ /dev/null @@ -1,49 +0,0 @@ -# Default values for spark-operator. ---- -image: - repository: docker.stackable.tech/stackable/spark-operator - pullPolicy: IfNotPresent - -imagePullSecrets: [] -nameOverride: "" -fullnameOverride: "" - -serviceAccount: - # Specifies whether a service account should be created - create: true - # Annotations to add to the service account - annotations: {} - # The name of the service account to use. - # If not set and create is true, a name is generated using the fullname template - name: "" - -podAnnotations: {} - -podSecurityContext: {} - # fsGroup: 2000 - -securityContext: {} - # capabilities: - # drop: - # - ALL - # readOnlyRootFilesystem: true - # runAsNonRoot: true - # runAsUser: 1000 - -resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - -nodeSelector: {} - -tolerations: [] - -affinity: {} diff --git a/deploy/manifests/roles.yaml b/deploy/manifests/roles.yaml new file mode 100644 index 00000000..d3eb1986 --- /dev/null +++ b/deploy/manifests/roles.yaml @@ -0,0 +1,98 @@ +--- +# Source: spark-k8s-operator/templates/roles.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: spark-k8s-operator-clusterrole +rules: + - apiGroups: + - "" + resources: + - nodes + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list + - apiGroups: + - "" + resources: + - pods + - configmaps + - secrets + - services + - endpoints + - serviceaccounts + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - rbac.authorization.k8s.io + resources: + - rolebindings + verbs: + - create + - delete + - get + - list + - patch + - update + - watch + - apiGroups: + - apps + resources: + - statefulsets + verbs: + - create + - delete + - list + - patch + - update + - watch + - apiGroups: + - batch + resources: + - jobs + verbs: + - create + - get + - list + - patch + - update + - watch + - apiGroups: + - apiextensions.k8s.io + resources: + - customresourcedefinitions + verbs: + - get + - apiGroups: + - events.k8s.io + resources: + - events + verbs: + - create + - apiGroups: + - spark.stackable.tech + resources: + - sparkapplications + verbs: + - get + - list + - patch + - watch + - apiGroups: + - spark.stackable.tech + resources: + - sparkapplications/status + verbs: + - patch diff --git a/deploy/manifests/spark-clusterrole.yaml b/deploy/manifests/spark-clusterrole.yaml new file mode 100644 index 00000000..5c8f7ff5 --- /dev/null +++ b/deploy/manifests/spark-clusterrole.yaml @@ -0,0 +1,16 @@ +--- +# Source: spark-k8s-operator/templates/spark-clusterrole.yaml +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: spark-driver-edit-role +rules: + - apiGroups: [""] + resources: ["pods", "services", "configmaps"] + verbs: ["get", "list", "watch", "create", "delete"] + - apiGroups: + - "" + resources: + - persistentvolumeclaims + verbs: + - list diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs index bf43347b..8fdb14a6 100644 --- a/rust/crd/src/constants.rs +++ b/rust/crd/src/constants.rs @@ -1,3 +1,5 @@ +pub const APP_NAME: &str = "spark-k8s"; + pub const VOLUME_MOUNT_NAME_POD_TEMPLATES: &str = "pod-template"; pub const VOLUME_MOUNT_PATH_POD_TEMPLATES: &str = "/stackable/spark/pod-templates"; diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index d9a14374..0592c8ce 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -5,12 +5,13 @@ pub mod constants; use constants::*; use stackable_operator::k8s_openapi::api::core::v1::{EnvVar, Volume, VolumeMount}; -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, Snafu}; use stackable_operator::k8s_openapi::apimachinery::pkg::apis::meta::v1::Time; use stackable_operator::kube::ResourceExt; +use stackable_operator::labels; use stackable_operator::{ kube::CustomResource, role_utils::CommonConfiguration, @@ -117,6 +118,10 @@ impl SparkApplication { self.spec.image.as_deref() } + pub fn version(&self) -> Option<&str> { + self.spec.version.as_deref() + } + pub fn application_artifact(&self) -> Option<&str> { self.spec.main_application_file.as_deref() } @@ -159,7 +164,19 @@ impl SparkApplication { tmp.iter().flat_map(|v| v.iter()).cloned().collect() } - pub fn build_command(&self) -> Result, Error> { + pub fn recommended_labels(&self) -> BTreeMap { + let mut ls = labels::build_common_labels_for_all_managed_resources(APP_NAME, &self.name()); + if let Some(version) = self.version() { + ls.insert(labels::APP_VERSION_LABEL.to_string(), version.to_string()); + } + ls.insert( + labels::APP_MANAGED_BY_LABEL.to_string(), + format!("{}-operator", APP_NAME), + ); + ls + } + + pub fn build_command(&self, serviceaccount_name: &str) -> Result, Error> { // mandatory properties let mode = self.mode().context(ObjectHasNoDeployModeSnafu)?; let name = self.metadata.name.clone().context(ObjectHasNoNameSnafu)?; @@ -177,6 +194,7 @@ impl SparkApplication { format!("--conf spark.kubernetes.namespace={}", self.metadata.namespace.as_ref().context(NoNamespaceSnafu)?), format!("--conf spark.kubernetes.driver.container.image={}", self.spec.spark_image.as_ref().context(NoSparkImageSnafu)?), format!("--conf spark.kubernetes.executor.container.image={}", self.spec.spark_image.as_ref().context(NoSparkImageSnafu)?), + format!("--conf spark.kubernetes.authenticate.driver.serviceAccountName={}", serviceaccount_name), //"--conf spark.kubernetes.file.upload.path=s3a://stackable-spark-k8s-jars/jobs".to_string(), //"--conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem".to_string(), //"--conf spark.driver.extraClassPath=/stackable/.ivy2/cache".to_string(), diff --git a/rust/operator-binary/src/spark_k8s_controller.rs b/rust/operator-binary/src/spark_k8s_controller.rs index db399153..d312b68c 100644 --- a/rust/operator-binary/src/spark_k8s_controller.rs +++ b/rust/operator-binary/src/spark_k8s_controller.rs @@ -5,19 +5,20 @@ use stackable_operator::builder::{ use stackable_operator::k8s_openapi::api::batch::v1::{Job, JobSpec}; use stackable_operator::k8s_openapi::api::core::v1::{ ConfigMap, ConfigMapVolumeSource, Container, EmptyDirVolumeSource, EnvVar, Pod, PodSpec, - PodTemplateSpec, Volume, VolumeMount, + PodTemplateSpec, ServiceAccount, Volume, VolumeMount, }; +use stackable_operator::k8s_openapi::api::rbac::v1::{ClusterRole, RoleBinding, RoleRef, Subject}; +use stackable_operator::k8s_openapi::Resource; +use stackable_operator::kube::runtime::controller::{Action, Context}; use stackable_operator::logging::controller::ReconcilerError; -use stackable_operator::{ - kube::runtime::controller::{Action, Context}, - product_config::ProductConfigManager, -}; +use stackable_operator::product_config::ProductConfigManager; use stackable_spark_k8s_crd::constants::*; use stackable_spark_k8s_crd::SparkApplication; use std::{sync::Arc, time::Duration}; use strum::{EnumDiscriminants, IntoStaticStr}; const FIELD_MANAGER_SCOPE: &str = "sparkapplication"; +const SPARK_CLUSTER_ROLE: &str = "spark-driver-edit-role"; pub struct Ctx { pub client: stackable_operator::client::Client, @@ -34,6 +35,14 @@ pub enum Error { ObjectMissingMetadataForOwnerRef { source: stackable_operator::error::Error, }, + #[snafu(display("failed to apply role ServiceAccount"))] + ApplyServiceAccount { + source: stackable_operator::error::Error, + }, + #[snafu(display("failed to apply global RoleBinding"))] + ApplyRoleBinding { + source: stackable_operator::error::Error, + }, #[snafu(display("failed to apply Job"))] ApplyApplication { source: stackable_operator::error::Error, @@ -80,6 +89,16 @@ pub async fn reconcile( let client = &ctx.get_ref().client; + let (serviceaccount, rolebinding) = build_spark_role_serviceaccount(&spark_application)?; + client + .apply_patch(FIELD_MANAGER_SCOPE, &serviceaccount, &serviceaccount) + .await + .context(ApplyServiceAccountSnafu)?; + client + .apply_patch(FIELD_MANAGER_SCOPE, &rolebinding, &rolebinding) + .await + .context(ApplyRoleBindingSnafu)?; + let spark_image = spark_application .spec .spark_image @@ -123,7 +142,12 @@ pub async fn reconcile( .await .context(ApplyApplicationSnafu)?; - let job = spark_job(&spark_application, spark_image, &job_container)?; + let job = spark_job( + &spark_application, + spark_image, + &serviceaccount, + &job_container, + )?; client .apply_patch(FIELD_MANAGER_SCOPE, &job, &job) .await @@ -213,6 +237,7 @@ fn pod_template_config_map( .name(spark_application.pod_template_config_map_name()) .ownerreference_from_resource(spark_application, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_labels(spark_application.recommended_labels()) .build(), ) .add_data( @@ -230,6 +255,7 @@ fn pod_template_config_map( fn spark_job( spark_application: &SparkApplication, spark_image: &str, + serviceaccount: &ServiceAccount, job_container: &Option, ) -> Result { let mut volume_mounts = vec![VolumeMount { @@ -247,7 +273,7 @@ fn spark_job( } let commands = spark_application - .build_command() + .build_command(serviceaccount.metadata.name.as_ref().unwrap()) .context(BuildCommandSnafu)?; let mut container = ContainerBuilder::new("spark-submit"); @@ -282,11 +308,17 @@ fn spark_job( } let pod = PodTemplateSpec { - metadata: Some(ObjectMetaBuilder::new().name("spark-submit").build()), + metadata: Some( + ObjectMetaBuilder::new() + .name("spark-submit") + .with_labels(spark_application.recommended_labels()) + .build(), + ), spec: Some(PodSpec { containers: vec![container.build()], init_containers: job_container.as_ref().map(|c| vec![c.clone()]), restart_policy: Some("Never".to_string()), + service_account_name: serviceaccount.metadata.name.clone(), volumes: Some(volumes), ..PodSpec::default() }), @@ -297,6 +329,7 @@ fn spark_job( .name_and_namespace(spark_application) .ownerreference_from_resource(spark_application, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_labels(spark_application.recommended_labels()) .build(), spec: Some(JobSpec { template: pod, @@ -309,14 +342,56 @@ fn spark_job( Ok(job) } +/// For a given SparkApplication, we create a ServiceAccount with a RoleBinding to the ClusterRole +/// that allows the driver to create pods etc. +/// Both objects have an owner reference to the SparkApplication, as well as the same name as the app. +/// They are deleted when the job is deleted. +fn build_spark_role_serviceaccount( + spark_app: &SparkApplication, +) -> Result<(ServiceAccount, RoleBinding)> { + let sa_name = spark_app.metadata.name.as_ref().unwrap().to_string(); + let sa = ServiceAccount { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(spark_app) + .name(&sa_name) + .ownerreference_from_resource(spark_app, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_labels(spark_app.recommended_labels()) + .build(), + ..ServiceAccount::default() + }; + let binding_name = &sa_name; + let binding = RoleBinding { + metadata: ObjectMetaBuilder::new() + .name_and_namespace(spark_app) + .name(binding_name) + .ownerreference_from_resource(spark_app, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_labels(spark_app.recommended_labels()) + .build(), + role_ref: RoleRef { + api_group: ClusterRole::GROUP.to_string(), + kind: ClusterRole::KIND.to_string(), + name: SPARK_CLUSTER_ROLE.to_string(), + }, + subjects: Some(vec![Subject { + api_group: Some(ServiceAccount::GROUP.to_string()), + kind: ServiceAccount::KIND.to_string(), + name: sa_name, + namespace: sa.metadata.namespace.clone(), + }]), + }; + Ok((sa, binding)) +} + pub fn error_policy(_error: &Error, _ctx: Context) -> Action { Action::requeue(Duration::from_secs(5)) } #[cfg(test)] mod tests { - use crate::spark_k8s_controller::pod_template_config_map; use crate::spark_k8s_controller::spark_job; + use crate::spark_k8s_controller::{build_spark_role_serviceaccount, pod_template_config_map}; use crate::SparkApplication; #[test] @@ -398,8 +473,10 @@ spec: memory: "512m" "#).unwrap(); + let (serviceaccount, _rolebinding) = + build_spark_role_serviceaccount(&spark_application).unwrap(); let spark_image = spark_application.spec.spark_image.as_ref().unwrap(); - let job = spark_job(&spark_application, spark_image, &None).unwrap(); + let job = spark_job(&spark_application, spark_image, &serviceaccount, &None).unwrap(); let job_containers = &job .clone() .spec