Skip to content

Commit 55868fe

Browse files
authored
Merge pull request #107 from stackabletech/106-duplicate-entries-for-key-name=job-files
Added test for PySpark application published as a Docker image.
2 parents f146839 + 5141c5c commit 55868fe

11 files changed

+5219
-0
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,13 @@ All notable changes to this project will be documented in this file.
1010
- Pinned MinIO version for tests ([#100])
1111
- `operator-rs` `0.21.0``0.22.0` ([#102]).
1212
- Added owner-reference to pod templates ([#104])
13+
- Added kuttl test for the case when pyspark jobs are provisioned using the `image` property of the `SparkApplication` definition ([#107])
1314

1415
[#97]: https://github.com/stackabletech/spark-k8s-operator/pull/92
1516
[#100]: https://github.com/stackabletech/spark-k8s-operator/pull/100
1617
[#102]: https://github.com/stackabletech/spark-k8s-operator/pull/102
1718
[#104]: https://github.com/stackabletech/spark-k8s-operator/pull/104
19+
[#107]: https://github.com/stackabletech/spark-k8s-operator/pull/107
1820

1921
## [0.3.0] - 2022-06-30
2022

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
metadata:
5+
name: minio
6+
timeout: 900
7+
---
8+
apiVersion: v1
9+
kind: Service
10+
metadata:
11+
name: test-minio
12+
labels:
13+
app: minio
14+
---
15+
apiVersion: apps/v1
16+
kind: StatefulSet
17+
metadata:
18+
name: minio-mc
19+
status:
20+
readyReplicas: 1
21+
replicas: 1
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
---
2+
apiVersion: v1
3+
kind: Service
4+
metadata:
5+
name: minio-mc
6+
labels:
7+
app: minio-mc
8+
timeout: 240
9+
spec:
10+
clusterIP: None
11+
selector:
12+
app: minio-mc
13+
---
14+
apiVersion: apps/v1
15+
kind: StatefulSet
16+
metadata:
17+
name: minio-mc
18+
labels:
19+
app: minio-mc
20+
timeout: 240
21+
spec:
22+
replicas: 1
23+
serviceName: "minio-mc"
24+
selector:
25+
matchLabels:
26+
app: minio-mc
27+
template:
28+
metadata:
29+
labels:
30+
app: minio-mc
31+
spec:
32+
containers:
33+
- name: minio-mc
34+
image: bitnami/minio:2022-debian-10
35+
stdin: true
36+
tty: true
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: >-
6+
helm install test-minio
7+
--namespace $NAMESPACE
8+
--version 4.0.2
9+
--set mode=standalone
10+
--set replicas=1
11+
--set persistence.enabled=false
12+
--set buckets[0].name=my-bucket,buckets[0].policy=public
13+
--set resources.requests.memory=1Gi
14+
--repo https://charts.min.io/ minio
15+
timeout: 240
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: >-
6+
kubectl exec -n $NAMESPACE minio-mc-0 --
7+
sh -c 'mc alias set test-minio http://test-minio:9000/'
8+
- script: kubectl cp -n $NAMESPACE yellow_tripdata_2021-07.csv minio-mc-0:/tmp
9+
- script: >-
10+
kubectl exec -n $NAMESPACE minio-mc-0 --
11+
mc cp /tmp/yellow_tripdata_2021-07.csv test-minio/my-bucket
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
metadata:
5+
name: pyspark-ny-deps-job
6+
timeout: 900
7+
---
8+
apiVersion: batch/v1
9+
kind: Job
10+
metadata:
11+
name: pyspark-ny-deps-job
12+
status:
13+
succeeded: 1
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
---
2+
apiVersion: v1
3+
kind: PersistentVolumeClaim
4+
metadata:
5+
name: pyspark-ny-pvc
6+
spec:
7+
accessModes:
8+
- ReadWriteOnce
9+
resources:
10+
requests:
11+
storage: 1Gi
12+
---
13+
apiVersion: batch/v1
14+
kind: Job
15+
metadata:
16+
name: pyspark-ny-deps-job
17+
spec:
18+
template:
19+
spec:
20+
nodeSelector:
21+
node: "1"
22+
restartPolicy: Never
23+
volumes:
24+
- name: job-deps
25+
persistentVolumeClaim:
26+
claimName: pyspark-ny-pvc
27+
containers:
28+
- name: aws-deps
29+
image: docker.stackable.tech/stackable/tools:0.2.0-stackable0
30+
env:
31+
- name: DEST_DIR
32+
value: "/dependencies/jars"
33+
- name: AWS
34+
value: "1.11.1026"
35+
- name: HADOOP
36+
value: "3.3.3"
37+
command:
38+
[
39+
"bash",
40+
"-x",
41+
"-o",
42+
"pipefail",
43+
"-c",
44+
"mkdir -p ${DEST_DIR} && curl -L https://search.maven.org/remotecontent?filepath=org/apache/hadoop/hadoop-aws/${HADOOP}/hadoop-aws-${HADOOP}.jar -o ${DEST_DIR}/hadoop-aws-${HADOOP}.jar && curl -L https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS}/aws-java-sdk-bundle-${AWS}.jar -o ${DEST_DIR}/aws-java-sdk-bundle-${AWS}.jar && chown -R stackable:stackable ${DEST_DIR} && chmod -R a=,u=rwX ${DEST_DIR}",
45+
]
46+
volumeMounts:
47+
- name: job-deps
48+
mountPath: /dependencies
49+
securityContext:
50+
runAsUser: 0
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
metadata:
5+
name: pyspark-ny-public-s3-image
6+
timeout: 900
7+
---
8+
# The Job starting the whole process
9+
apiVersion: spark.stackable.tech/v1alpha1
10+
kind: SparkApplication
11+
metadata:
12+
name: pyspark-ny-public-s3-image
13+
status:
14+
phase: Succeeded
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
---
2+
apiVersion: spark.stackable.tech/v1alpha1
3+
kind: SparkApplication
4+
metadata:
5+
name: pyspark-ny-public-s3-image
6+
spec:
7+
version: "1.0"
8+
# everything under /jobs will be copied to /stackable/spark/jobs
9+
image: docker.stackable.tech/stackable/ny-tlc-report:{{ test_scenario['values']['ny-tlc-report'] }}
10+
sparkImage: docker.stackable.tech/stackable/pyspark-k8s:{{ test_scenario['values']['spark'] }}-stackable{{ test_scenario['values']['stackable'] }}
11+
sparkImagePullPolicy: IfNotPresent
12+
mode: cluster
13+
mainApplicationFile: local:///stackable/spark/jobs/ny_tlc_report.py
14+
args:
15+
- "--input 's3a://my-bucket/yellow_tripdata_2021-07.csv'"
16+
deps:
17+
requirements:
18+
- tabulate==0.8.9
19+
s3bucket:
20+
inline:
21+
bucketName: my-bucket
22+
connection:
23+
inline:
24+
host: test-minio
25+
port: 9000
26+
accessStyle: Path
27+
sparkConf:
28+
spark.hadoop.fs.s3a.aws.credentials.provider: "org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider"
29+
spark.driver.extraClassPath: "/dependencies/jars/*"
30+
spark.executor.extraClassPath: "/dependencies/jars/*"
31+
volumes:
32+
- name: job-deps
33+
persistentVolumeClaim:
34+
claimName: pyspark-ny-pvc
35+
driver:
36+
cores: 1
37+
coreLimit: "1200m"
38+
memory: "512m"
39+
volumeMounts:
40+
- name: job-deps
41+
mountPath: /dependencies
42+
executor:
43+
cores: 1
44+
instances: 3
45+
memory: "512m"
46+
volumeMounts:
47+
- name: job-deps
48+
mountPath: /dependencies

0 commit comments

Comments
 (0)