Skip to content

Commit 4ff94bc

Browse files
authored
Merge pull request #36 from stackabletech/pvc-support
Pvc support
2 parents 8054c54 + a51f013 commit 4ff94bc

12 files changed

+3754
-103
lines changed

deploy/crd/sparkapplication.crd.yaml

Lines changed: 1142 additions & 0 deletions
Large diffs are not rendered by default.

deploy/helm/spark-k8s-operator/crds/crds.yaml

Lines changed: 1142 additions & 0 deletions
Large diffs are not rendered by default.

deploy/manifests/crds.yaml

Lines changed: 1142 additions & 0 deletions
Large diffs are not rendered by default.

examples/ny-tlc-report-external-dependencies.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ metadata:
66
namespace: default
77
spec:
88
version: "1.0"
9-
sparkImage: docker.stackable.tech/stackable/spark-k8s:3.2.1-hadoop3.2-python39-aws1.11.375-stackable0.3.0
9+
sparkImage: docker.stackable.tech/stackable/pyspark-k8s:3.2.1-hadoop3.2-python39-stackable0.1.0
1010
mode: cluster
1111
mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny_tlc_report.py
1212
args:
@@ -16,11 +16,23 @@ spec:
1616
- tabulate==0.8.9
1717
sparkConf:
1818
"spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
19+
"spark.driver.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
20+
"spark.executor.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
21+
volumes:
22+
- name: job-deps
23+
persistentVolumeClaim:
24+
claimName: pvc-ksv
1925
driver:
2026
cores: 1
2127
coreLimit: "1200m"
2228
memory: "512m"
29+
volumeMounts:
30+
- name: job-deps
31+
mountPath: /dependencies
2332
executor:
2433
cores: 1
2534
instances: 3
2635
memory: "512m"
36+
volumeMounts:
37+
- name: job-deps
38+
mountPath: /dependencies

examples/ny-tlc-report-image.yaml

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ metadata:
77
spec:
88
version: "1.0"
99
image: docker.stackable.tech/stackable/ny-tlc-report:0.1.0
10-
sparkImage: docker.stackable.tech/stackable/spark-k8s:3.2.1-hadoop3.2-python39-aws1.11.375-stackable0.3.0
10+
sparkImage: docker.stackable.tech/stackable/pyspark-k8s:3.2.1-hadoop3.2-python39-stackable0.1.0
1111
mode: cluster
1212
mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py
1313
args:
@@ -17,11 +17,23 @@ spec:
1717
- tabulate==0.8.9
1818
sparkConf:
1919
"spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
20+
"spark.driver.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
21+
"spark.executor.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
22+
volumes:
23+
- name: job-deps
24+
persistentVolumeClaim:
25+
claimName: pvc-ksv
2026
driver:
2127
cores: 1
2228
coreLimit: "1200m"
2329
memory: "512m"
30+
volumeMounts:
31+
- name: job-deps
32+
mountPath: /dependencies
2433
executor:
2534
cores: 1
2635
instances: 3
2736
memory: "512m"
37+
volumeMounts:
38+
- name: job-deps
39+
mountPath: /dependencies

examples/ny-tlc-report-pvc.yaml

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
apiVersion: spark.stackable.tech/v1alpha1
3+
kind: SparkApplication
4+
metadata:
5+
name: ny-tlc-report-pvc
6+
namespace: default
7+
spec:
8+
version: "1.0"
9+
sparkImage: docker.stackable.tech/stackable/spark-k8s:3.2.1-hadoop3.2-stackable0.4.0
10+
mode: cluster
11+
mainApplicationFile: s3a://stackable-spark-k8s-jars/jobs/ny-tlc-report-1.0-SNAPSHOT.jar
12+
mainClass: org.example.App
13+
volumes:
14+
- name: job-deps
15+
persistentVolumeClaim:
16+
claimName: pvc-ksv
17+
args:
18+
- "'s3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv'"
19+
sparkConf:
20+
"spark.hadoop.fs.s3a.aws.credentials.provider": "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
21+
"spark.driver.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
22+
"spark.executor.extraClassPath": "/dependencies/jars/hadoop-aws-3.2.0.jar:/dependencies/jars/aws-java-sdk-bundle-1.11.375.jar"
23+
driver:
24+
cores: 1
25+
coreLimit: "1200m"
26+
memory: "512m"
27+
volumeMounts:
28+
- name: job-deps
29+
mountPath: /dependencies
30+
executor:
31+
cores: 1
32+
instances: 3
33+
memory: "512m"
34+
volumeMounts:
35+
- name: job-deps
36+
mountPath: /dependencies

examples/spark-examples-s3.yaml

Lines changed: 0 additions & 23 deletions
This file was deleted.

kind/assert-pvc-jars.yaml

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
apiVersion: batch/v1
3+
kind: Job
4+
metadata:
5+
name: assert-pvc-jars
6+
spec:
7+
template:
8+
spec:
9+
restartPolicy: Never
10+
volumes:
11+
- name: job-deps
12+
persistentVolumeClaim:
13+
claimName: pvc-ksv
14+
containers:
15+
- name: assert-pvc-jars
16+
image: docker.stackable.tech/stackable/tools:0.2.0-stackable0
17+
env:
18+
- name: DEST_DIR
19+
value: "/dependencies/jars"
20+
command: ["bash", "-x", "-c", "ls -ltr $DEST_DIR"]
21+
securityContext:
22+
runAsUser: 0
23+
volumeMounts:
24+
- name: job-deps
25+
mountPath: /dependencies

kind/kind-pvc.yaml

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
---
2+
apiVersion: v1
3+
kind: PersistentVolumeClaim
4+
metadata:
5+
name: pvc-ksv
6+
spec:
7+
accessModes:
8+
- ReadWriteOnce
9+
resources:
10+
requests:
11+
storage: 1Gi
12+
---
13+
apiVersion: batch/v1
14+
kind: Job
15+
metadata:
16+
name: aws-deps
17+
spec:
18+
template:
19+
spec:
20+
restartPolicy: Never
21+
volumes:
22+
- name: job-deps
23+
persistentVolumeClaim:
24+
claimName: pvc-ksv
25+
initContainers:
26+
- name: dest-dir
27+
image: docker.stackable.tech/stackable/tools:0.2.0-stackable0
28+
env:
29+
- name: DEST_DIR
30+
value: "/stackable/spark/dependencies/jars"
31+
command:
32+
[
33+
"bash",
34+
"-x",
35+
"-c",
36+
"mkdir -p $DEST_DIR && chown stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}",
37+
]
38+
securityContext:
39+
runAsUser: 0
40+
volumeMounts:
41+
- name: job-deps
42+
mountPath: /stackable/spark/dependencies
43+
containers:
44+
- name: aws-deps
45+
image: docker.stackable.tech/stackable/tools:0.2.0-stackable0
46+
env:
47+
- name: DEST_DIR
48+
value: "/stackable/spark/dependencies/jars"
49+
- name: AWS
50+
value: "1.11.375"
51+
- name: HADOOP
52+
value: "3.2"
53+
command:
54+
[
55+
"bash",
56+
"-x",
57+
"-c",
58+
"curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}.0/hadoop-aws-${HADOOP}.0.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.0.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar",
59+
]
60+
volumeMounts:
61+
- name: job-deps
62+
mountPath: /stackable/spark/dependencies

kind/spark-k8s-kind-cluster-yaml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
kind: Cluster
3+
apiVersion: kind.x-k8s.io/v1alpha4
4+
nodes:
5+
- role: control-plane
6+
extraMounts:
7+
- hostPath: /tmp/kind-shared-volume
8+
containerPath: /kind-shared-volume
9+
- role: worker
10+
kubeadmConfigPatches:
11+
- |
12+
kind: JoinConfiguration
13+
nodeRegistration:
14+
kubeletExtraArgs:
15+
node-labels: node=1,
16+
extraMounts:
17+
- hostPath: /tmp/kind-shared-volume
18+
containerPath: /kind-shared-volume
19+
- role: worker
20+
kubeadmConfigPatches:
21+
- |
22+
kind: JoinConfiguration
23+
nodeRegistration:
24+
kubeletExtraArgs:
25+
node-labels: node=2
26+
extraMounts:
27+
- hostPath: /tmp/kind-shared-volume
28+
containerPath: /kind-shared-volume
29+
- role: worker
30+
kubeadmConfigPatches:
31+
- |
32+
kind: JoinConfiguration
33+
nodeRegistration:
34+
kubeletExtraArgs:
35+
node-labels: node=3
36+
extraMounts:
37+
- hostPath: /tmp/kind-shared-volume
38+
containerPath: /kind-shared-volume
39+

rust/crd/src/lib.rs

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
pub mod constants;
44

55
use constants::*;
6+
use stackable_operator::k8s_openapi::api::core::v1::{EnvVar, Volume, VolumeMount};
67

78
use std::collections::HashMap;
89

@@ -76,6 +77,10 @@ pub struct SparkApplicationSpec {
7677
pub deps: Option<JobDependencies>,
7778
#[serde(default, skip_serializing_if = "Option::is_none")]
7879
pub args: Option<Vec<String>>,
80+
#[serde(default, skip_serializing_if = "Option::is_none")]
81+
pub volumes: Option<Vec<Volume>>,
82+
#[serde(default, skip_serializing_if = "Option::is_none")]
83+
pub env: Option<Vec<EnvVar>>,
7984
}
8085

8186
#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)]
@@ -124,6 +129,36 @@ impl SparkApplication {
124129
.map(|req| req.join(" "))
125130
}
126131

132+
pub fn env(&self) -> Vec<EnvVar> {
133+
let tmp = self.spec.env.as_ref();
134+
tmp.iter().flat_map(|e| e.iter()).cloned().collect()
135+
}
136+
137+
pub fn volumes(&self) -> Vec<Volume> {
138+
let tmp = self.spec.volumes.as_ref();
139+
tmp.iter().flat_map(|v| v.iter()).cloned().collect()
140+
}
141+
142+
pub fn executor_volume_mounts(&self) -> Vec<VolumeMount> {
143+
let tmp = self
144+
.spec
145+
.executor
146+
.as_ref()
147+
.and_then(|executor_conf| executor_conf.volume_mounts.clone());
148+
149+
tmp.iter().flat_map(|v| v.iter()).cloned().collect()
150+
}
151+
152+
pub fn driver_volume_mounts(&self) -> Vec<VolumeMount> {
153+
let tmp = self
154+
.spec
155+
.driver
156+
.as_ref()
157+
.and_then(|driver_conf| driver_conf.volume_mounts.clone());
158+
159+
tmp.iter().flat_map(|v| v.iter()).cloned().collect()
160+
}
161+
127162
pub fn build_command(&self) -> Result<Vec<String>, Error> {
128163
// mandatory properties
129164
let mode = self.mode().context(ObjectHasNoDeployModeSnafu)?;
@@ -209,12 +244,14 @@ pub struct CommonConfig {
209244
pub enable_monitoring: Option<bool>,
210245
}
211246

212-
#[derive(Clone, Debug, Default, Deserialize, Eq, JsonSchema, PartialEq, Serialize)]
247+
#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)]
213248
#[serde(rename_all = "camelCase")]
214249
pub struct DriverConfig {
215250
pub cores: Option<usize>,
216251
pub core_limit: Option<String>,
217252
pub memory: Option<String>,
253+
#[serde(default, skip_serializing_if = "Option::is_none")]
254+
pub volume_mounts: Option<Vec<VolumeMount>>,
218255
}
219256

220257
impl DriverConfig {
@@ -235,12 +272,14 @@ impl DriverConfig {
235272
}
236273
}
237274

238-
#[derive(Clone, Debug, Default, Deserialize, Eq, JsonSchema, PartialEq, Serialize)]
275+
#[derive(Clone, Debug, Default, Deserialize, JsonSchema, PartialEq, Serialize)]
239276
#[serde(rename_all = "camelCase")]
240277
pub struct ExecutorConfig {
241278
pub cores: Option<usize>,
242279
pub instances: Option<usize>,
243280
pub memory: Option<String>,
281+
#[serde(default, skip_serializing_if = "Option::is_none")]
282+
pub volume_mounts: Option<Vec<VolumeMount>>,
244283
}
245284

246285
impl ExecutorConfig {

0 commit comments

Comments
 (0)