stackabletech · razvan · Jan 2, 2023 · Jan 2, 2023 · Jan 3, 2023 · Jan 3, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
 
 ## [Unreleased]
 
+### Added
+
+- Create and manage history servers ([#187])
+- `operator-rs` `0.27.1` -> `0.30.2` ([#187])
+
+[#187]: https://github.com/stackabletech/spark-k8s-operator/pull/187
+
 ### Changed
 
 - Updated stackable image versions ([#176])
@@ -43,7 +50,6 @@ All notable changes to this project will be documented in this file.
 - Update RBAC properties for OpenShift compatibility ([#126]).
 
 [#112]: https://github.com/stackabletech/spark-k8s-operator/pull/112
-[#114]: https://github.com/stackabletech/spark-k8s-operator/pull/114
 [#126]: https://github.com/stackabletech/spark-k8s-operator/pull/126
 
 ## [0.4.0] - 2022-08-03

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Makefile b/Makefile
@@ -23,6 +23,10 @@ SHELL=/usr/bin/env bash -euo pipefail
 render-readme:
 	scripts/render_readme.sh
 
+## Alternative Dockerfile that uses cargo chef to speed up dev builds.
+docker-build-alt:
+	docker build --build-arg VERSION=${VERSION} -t "docker.stackable.tech/stackable/spark-k8s-operator:${VERSION}" -f docker/Dockerfile.alternative .
+
 ## Docker related targets
 docker-build:
 	docker build --force-rm --build-arg VERSION=${VERSION} -t "docker.stackable.tech/stackable/spark-k8s-operator:${VERSION}" -f docker/Dockerfile .

diff --git a/deploy/helm/spark-k8s-operator/crds/crds.yaml b/deploy/helm/spark-k8s-operator/crds/crds.yaml
diff --git a/deploy/helm/spark-k8s-operator/templates/deployment.yaml b/deploy/helm/spark-k8s-operator/templates/deployment.yaml
@@ -31,6 +31,9 @@ spec:
         {{- toYaml .Values.podSecurityContext | nindent 8 }}
       containers:
         - name: {{ include "operator.appname" . }}
+          env:
+            - name: SPARK_K8S_OPERATOR_LOG
+              value: debug
           securityContext:
             {{- toYaml .Values.securityContext | nindent 12 }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"

diff --git a/deploy/helm/spark-k8s-operator/templates/roles.yaml b/deploy/helm/spark-k8s-operator/templates/roles.yaml
@@ -84,6 +84,7 @@ rules:
       - spark.stackable.tech
     resources:
       - sparkapplications
+      - sparkhistoryservers
     verbs:
       - get
       - list

diff --git a/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml b/deploy/helm/spark-k8s-operator/templates/spark-clusterrole.yaml
@@ -52,6 +52,7 @@ rules:
       - ""
     resources:
       - configmaps
+      - persistentvolumeclaims
       - pods
       - secrets
       - serviceaccounts

diff --git a/deploy/manifests/crds.yaml b/deploy/manifests/crds.yaml
diff --git a/deploy/manifests/deployment.yaml b/deploy/manifests/deployment.yaml
@@ -25,6 +25,9 @@ spec:
       securityContext: {}
       containers:
         - name: spark-k8s-operator
+          env:
+            - name: SPARK_K8S_OPERATOR_LOG
+              value: debug
           securityContext:
             allowPrivilegeEscalation: false
             capabilities:

diff --git a/deploy/manifests/roles.yaml b/deploy/manifests/roles.yaml
@@ -84,6 +84,7 @@ rules:
       - spark.stackable.tech
     resources:
       - sparkapplications
+      - sparkhistoryservers
     verbs:
       - get
       - list

diff --git a/deploy/manifests/spark-clusterrole.yaml b/deploy/manifests/spark-clusterrole.yaml
@@ -8,6 +8,7 @@ rules:
       - ""
     resources:
       - configmaps
+      - persistentvolumeclaims
       - pods
       - secrets
       - serviceaccounts

diff --git a/docker/Dockerfile.alternative b/docker/Dockerfile.alternative
@@ -0,0 +1,93 @@
+FROM registry.access.redhat.com/ubi8/ubi-minimal:8.6@sha256:c5ffdf5938d73283cec018f2adf59f0ed9f8c376d93e415a27b16c3c6aad6f45 AS chef
+LABEL maintainer="Stackable GmbH"
+
+# https://github.com/hadolint/hadolint/wiki/DL4006
+SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+
+# Update image and install everything needed for Rustup & Rust
+RUN microdnf update --disablerepo=* --enablerepo=ubi-8-appstream-rpms --enablerepo=ubi-8-baseos-rpms -y \
+  && rm -rf /var/cache/yum \
+  && microdnf install --disablerepo=* --enablerepo=ubi-8-appstream-rpms --enablerepo=ubi-8-baseos-rpms curl findutils gcc gcc-c++ make cmake openssl-devel pkg-config systemd-devel unzip -y \
+  && rm -rf /var/cache/yum
+
+WORKDIR /opt/protoc
+RUN PROTOC_VERSION=21.5 \
+  ARCH=$(arch | sed 's/^aarch64$/aarch_64/') \
+  && curl --location --output protoc.zip "https://repo.stackable.tech/repository/packages/protoc/protoc-${PROTOC_VERSION}-linux-${ARCH}.zip" \
+  && unzip protoc.zip \
+  && rm protoc.zip
+ENV PROTOC=/opt/protoc/bin/protoc
+WORKDIR /
+
+# IMPORTANT
+# If you change the toolchain version here, make sure to also change the "rust_version"
+# property in operator-templating/repositories.yaml
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.63.0 \
+ && . $HOME/.cargo/env \
+ && cargo install cargo-chef --locked
+
+WORKDIR /src
+
+FROM chef AS planner
+
+COPY . .
+RUN . $HOME/.cargo/env && cargo chef prepare --recipe-path recipe.json
+
+FROM chef AS builder 
+
+COPY --from=planner /src/recipe.json recipe.json
+
+# Build dependencies - this is the caching Docker layer!
+RUN . $HOME/.cargo/env && cargo chef cook --release --recipe-path recipe.json
+
+# Build application
+COPY . .
+RUN . $HOME/.cargo/env && cargo build --release
+
+WORKDIR /app
+
+# Copy the "interesting" files into /app.
+RUN find /src/target/release \
+                -regextype egrep \
+                # The interesting binaries are all directly in ${BUILD_DIR}.
+                -maxdepth 1 \
+                # Well, binaries are executable.
+                -executable \
+                # Well, binaries are files.
+                -type f \
+                # Filter out tests.
+                ! -regex ".*\-[a-fA-F0-9]{16,16}$" \
+                # Copy the matching files into /app.
+                -exec cp {} /app \;
+
+RUN echo "The following files will be copied to the runtime image: $(ls /app)"
+
+FROM registry.access.redhat.com/ubi8/ubi-minimal AS operator
+
+ARG VERSION
+ARG RELEASE="1"
+
+LABEL name="Stackable Operator for Apache Spark-on-Kubernetes" \
+  maintainer="[email protected]" \
+  vendor="Stackable GmbH" \
+  version="${VERSION}" \
+  release="${RELEASE}" \
+  summary="Deploy and manage Apache Spark-on-Kubernetes clusters." \
+  description="Deploy and manage Apache Spark-on-Kubernetes clusters."
+
+RUN microdnf install -y yum \
+  && yum -y update-minimal --security --sec-severity=Important --sec-severity=Critical \
+  && yum clean all \
+  && microdnf clean all
+
+COPY LICENSE /licenses/LICENSE
+
+COPY --from=builder /app/stackable-spark-k8s-operator /
+COPY deploy/config-spec/properties.yaml /etc/stackable/spark-k8s-operator/config-spec/properties.yaml
+
+RUN groupadd -g 1000 stackable && adduser -u 1000 -g stackable -c 'Stackable Operator' stackable
+
+USER stackable:stackable
+
+ENTRYPOINT ["/stackable-spark-k8s-operator"]
+CMD ["run"]
diff --git a/docs/modules/ROOT/examples/example-history-app.yaml b/docs/modules/ROOT/examples/example-history-app.yaml
@@ -0,0 +1,37 @@
+---
+apiVersion: spark.stackable.tech/v1alpha1
+kind: SparkApplication
+metadata:
+  name: spark-pi-s3-1
+spec:
+  version: "1.0"
+  sparkImage: docker.stackable.tech/stackable/spark-k8s:3.3.0-stackable0.3.0
+  sparkImagePullPolicy: IfNotPresent
+  mode: cluster
+  mainClass: org.apache.spark.examples.SparkPi
+  mainApplicationFile: s3a://my-bucket/spark-examples_2.12-3.3.0.jar
+  s3bucket: # <1>
+    inline:
+      bucketName: my-bucket
+      connection:
+        inline:
+          host: test-minio
+          port: 9000
+          accessStyle: Path
+          credentials:
+            secretClass: s3-credentials-class # <2>
+  logFileDirectory: # <3>
+    s3:
+      prefix: eventlogs/ # <4>
+      bucket:
+        inline:
+          bucketName: spark-logs # <5>
+          connection:
+            inline:
+              host: test-minio
+              port: 9000
+              accessStyle: Path
+              credentials:
+                secretClass: history-credentials-class # <6>
+  executor:
+    instances: 1
diff --git a/docs/modules/ROOT/examples/example-history-server.yaml b/docs/modules/ROOT/examples/example-history-server.yaml
@@ -0,0 +1,29 @@
+---
+apiVersion: spark.stackable.tech/v1alpha1
+kind: SparkHistoryServer
+metadata:
+  name: spark-history
+spec:
+  image:
+    productVersion: 3.3.0
+    stackableVersion: 0.3.0
+  logFileDirectory:  # <1>
+    s3:
+      prefix: eventlogs/  # <2>
+      bucket:  # <3>
+        inline:
+          bucketName: spark-logs
+          connection:
+            inline:
+              host: test-minio
+              port: 9000
+              accessStyle: Path
+              credentials:
+                secretClass: s3-credentials-class
+  sparkConf:  # <4>
+  nodes:
+    roleGroups:
+      cleaner:
+        replicas: 1  # <5>
+        config:
+          cleaner: true # <6>
diff --git a/docs/modules/ROOT/images/history-server-ui.png b/docs/modules/ROOT/images/history-server-ui.png
diff --git a/docs/modules/ROOT/nav.adoc b/docs/modules/ROOT/nav.adoc
@@ -2,3 +2,4 @@
 * xref:usage.adoc[]
 * xref:job_dependencies.adoc[]
 * xref:rbac.adoc[]
+* xref:history_server.adoc[]
diff --git a/docs/modules/ROOT/pages/history_server.adoc b/docs/modules/ROOT/pages/history_server.adoc
@@ -0,0 +1,50 @@
+= Spark History Server
+
+== Overview
+
+The Stackable Spark-on-Kubernetes operator runs Apache Spark workloads in a Kubernetes cluster, whereby driver- and executor-pods are created for the duration of the job and then terminated. One or more Spark History Server instances can be deployed independently of `SparkApplication` jobs and used as an end-point for spark logging, so that job information can be viewed once the job pods are no longer available.
+
+== Deployment
+
+The example below demonstrates how to set up the history server running in one Pod with scheduled cleanups of the event logs. The event logs are loaded from an S3 bucket named `spark-logs` and the folder `eventlogs/`. The credentials for this bucket are provided by the secret class `s3-credentials-class`. For more details on how the Stackable Data Platform manages S3 resources see the xref:home:concepts:s3.adoc[S3 resources] page.
+
+
+[source,yaml]
+----
+include::example$example-history-server.yaml[]
+----
+
+<1> The location of the event logs. Must be a S3 bucket. Future implementations might add support for other shared filesystems such as HDFS.
+<2> Folder within the S3 bucket where the log files are located. This is folder is required and mus exist before setting up the history server.
+<3> The S3 bucket definition, here provided in-line.
+<4> Additional gistory server configuration properties can be provided here as a map. For possible properties see: https://spark.apache.org/docs/latest/monitoring.html#spark-history-server-configuration-options
+<5> This deployment has only one Pod. Multiple history servers can be started, all reading the same event logs by increasing the relica count.
+<6> This history server will automatically clean up old log files by using default properties. You can change any of these by using the `sparkConf` map.
+
+NOTE: Only one role group can have scheduled cleanups enabled (`cleaner: true`) and this role group can have a maximum replica of 1.
+
+== Application configuration
+
+
+The example below demonstrates how to configure Spark applications store log events to a S3 bucket.
+
+[source,yaml]
+----
+include::example$example-history-app.yaml[]
+----
+
+<1> Location of the data that is being processed by the application.
+<2> Credentials used to access the data above.
+<3> Instruct the operator to configure the application with logging enabled.
+<4> Folder to store logs. This must match the prefix used by the history server.
+<5> Bucket to store logs. This must match the bucket used by the history server.
+<6> Not used by the application! The operator will ignore this and use the credentials from the `s3bucket` to store event logs.
+
+
+
+== History Web UI
+
+The history exposes a user console on port 18080. By setting up port-forwarding on 18080 this UI can be opened in a browser to show running and completed jobs:
+
+image::history-server-ui.png[History Server Console]
+
diff --git a/rust/crd/Cargo.toml b/rust/crd/Cargo.toml
@@ -9,14 +9,15 @@ version = "0.7.0-nightly"
 publish = false
 
 [dependencies]
-stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.27.1" }
+stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag="0.30.2" }
 
 semver = "1.0"
-serde = { version = "1.0", features = ["derive"] }
+serde = "1.0"
 serde_json = "1.0"
 serde_yaml = "0.8"
 snafu = "0.7"
 strum = { version = "0.24", features = ["derive"] }
+tracing = "0.1"
 
 [dev-dependencies]
 rstest = "0.16.0"
diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs
@@ -24,3 +24,20 @@ pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets";
 pub const MIN_MEMORY_OVERHEAD: u32 = 384;
 pub const JVM_OVERHEAD_FACTOR: f32 = 0.1;
 pub const NON_JVM_OVERHEAD_FACTOR: f32 = 0.4;
+
+pub const OPERATOR_NAME: &str = "spark.stackable.tech";
+pub const CONTROLLER_NAME: &str = "sparkapplication";
+pub const POD_DRIVER_CONTROLLER_NAME: &str = "pod-driver";
+pub const HISTORY_CONTROLLER_NAME: &str = "history";
+
+pub const HISTORY_ROLE_NAME: &str = "node";
+
+pub const HISTORY_IMAGE_BASE_NAME: &str = "spark-k8s";
+
+pub const HISTORY_CONFIG_FILE_NAME: &str = "spark-defaults.conf";
+pub const HISTORY_CONFIG_FILE_NAME_FULL: &str = "/stackable/spark/conf/spark-defaults.conf";
+
+pub const LABEL_NAME_INSTANCE: &str = "app.kubernetes.io/instance";
+
+pub const VOLUME_NAME_S3_CREDENTIALS: &str = "s3-credentials";
+pub const SPARK_CLUSTER_ROLE: &str = "spark-k8s-clusterrole";