Skip to content

Commit 8e1f237

Browse files
committed
feat(test): addded test for delta.io
This fails with: ``` spark org/apache/spark/sql/delta/stats/StatisticsCollection$SqlParser$$anon$1.visitMultipartIdentifierList(Lorg/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext;)Lscala/collection/Seq; @17: invokevirtual spark Reason: spark Type 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext' (current frame, stack[1]) is not assignable to 'org/antlr/v4/runtime/ParserRuleContext' spark Current Frame: spark bci: @17 spark flags: { } spark locals: { 'org/apache/spark/sql/delta/stats/StatisticsCollection$SqlParser$$anon$1', 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext' } spark stack: { 'org/apache/spark/sql/catalyst/parser/ParserUtils$', 'org/apache/spark/sql/catalyst/parser/SqlBaseParser$MultipartIdentifierListContext', 'scala/Option', 'scala/Function0' } spark Bytecode: spark 0000000: b200 232b b200 23b6 0027 2a2b ba00 3f00 spark 0000010: 00b6 0043 c000 45b0 spark spark at org.apache.spark.sql.delta.stats.StatisticsCollection$SqlParser.<init>(StatisticsCollection.scala:409) spark at org.apache.spark.sql.delta.stats.StatisticsCollection$.<init>(StatisticsCollection.scala:422) spark at org.apache.spark.sql.delta.stats.StatisticsCollection$.<clinit>(StatisticsCollection.scala) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadataInternal(OptimisticTransaction.scala:429) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadataInternal$(OptimisticTransaction.scala:424) spark at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadataInternal(OptimisticTransaction.scala:142) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata(OptimisticTransaction.scala:400) spark at org.apache.spark.sql.delta.OptimisticTransactionImpl.updateMetadata$(OptimisticTransaction.scala:393) spark at org.apache.spark.sql.delta.OptimisticTransaction.updateMetadata(OptimisticTransaction.scala:142) spark at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata(ImplicitMetadataOperation.scala:97) spark at org.apache.spark.sql.delta.schema.ImplicitMetadataOperation.updateMetadata$(ImplicitMetadataOperation.scala:56) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.updateMetadata(WriteIntoDelta.scala:76) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.write(WriteIntoDelta.scala:162) spark at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:105) ```
1 parent e8991da commit 8e1f237

10 files changed

+251
-0
lines changed
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
{% if lookup('env', 'VECTOR_AGGREGATOR') %}
5+
---
6+
apiVersion: v1
7+
kind: ConfigMap
8+
metadata:
9+
name: vector-aggregator-discovery
10+
{% endif %}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{% if lookup('env', 'VECTOR_AGGREGATOR') %}
2+
---
3+
apiVersion: v1
4+
kind: ConfigMap
5+
metadata:
6+
name: vector-aggregator-discovery
7+
data:
8+
ADDRESS: {{ lookup('env', 'VECTOR_AGGREGATOR') }}
9+
{% endif %}
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
---
2+
apiVersion: v1
3+
kind: Secret
4+
metadata:
5+
name: minio-credentials
6+
labels:
7+
secrets.stackable.tech/class: s3-credentials-class
8+
timeout: 240
9+
stringData:
10+
accessKey: minioAccessKey
11+
secretKey: minioSecretKey
12+
# The following two entries are used by the Bitnami chart for MinIO to
13+
# set up credentials for accessing buckets managed by the MinIO tenant.
14+
root-user: minioAccessKey
15+
root-password: minioSecretKey
16+
---
17+
apiVersion: secrets.stackable.tech/v1alpha1
18+
kind: SecretClass
19+
metadata:
20+
name: s3-credentials-class
21+
spec:
22+
backend:
23+
k8sSearch:
24+
searchNamespace:
25+
pod: {}
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
timeout: 900
5+
---
6+
apiVersion: apps/v1
7+
kind: Deployment
8+
metadata:
9+
name: test-minio
10+
status:
11+
readyReplicas: 1
12+
---
13+
apiVersion: v1
14+
kind: Pod
15+
metadata:
16+
name: minio-client
17+
labels:
18+
app: minio-client
19+
status:
20+
phase: Running
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
- script: >-
6+
helm install test-minio
7+
--namespace $NAMESPACE
8+
--version 11.9.2
9+
-f helm-bitnami-minio-values.yaml
10+
--repo https://charts.bitnami.com/bitnami minio
11+
timeout: 240
12+
---
13+
apiVersion: v1
14+
kind: Pod
15+
metadata:
16+
name: minio-client
17+
labels:
18+
app: minio-client
19+
spec:
20+
selector:
21+
matchLabels:
22+
app: minio-client
23+
restartPolicy: Never
24+
containers:
25+
- name: minio-client
26+
image: docker.io/bitnami/minio-client:2022.8.11-debian-11-r3
27+
command: ["bash", "-c", "sleep infinity"]
28+
stdin: true
29+
tty: true
30+
env:
31+
- name: MINIO_SERVER_ACCESS_KEY
32+
valueFrom:
33+
secretKeyRef:
34+
name: minio-credentials
35+
key: root-user
36+
optional: false
37+
- name: MINIO_SERVER_SECRET_KEY
38+
valueFrom:
39+
secretKeyRef:
40+
name: minio-credentials
41+
key: root-password
42+
optional: false
43+
- name: MINIO_SERVER_HOST
44+
value: test-minio
45+
- name: MINIO_SERVER_PORT_NUMBER
46+
value: "9000"
47+
- name: MINIO_SERVER_SCHEME
48+
value: http
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestStep
4+
commands:
5+
# give minio enough time to start
6+
- command: sleep 10
7+
- command: kubectl exec -n $NAMESPACE minio-client -- sh -c 'mc alias set test-minio http://test-minio:9000 $$MINIO_SERVER_ACCESS_KEY $$MINIO_SERVER_SECRET_KEY'
8+
- command: kubectl exec -n $NAMESPACE minio-client -- mc mb test-minio/my-bucket
9+
- command: kubectl exec -n $NAMESPACE minio-client -- mc policy set public test-minio/my-bucket
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
---
2+
apiVersion: kuttl.dev/v1beta1
3+
kind: TestAssert
4+
timeout: 300
5+
---
6+
# The Job starting the whole process
7+
apiVersion: spark.stackable.tech/v1alpha1
8+
kind: SparkApplication
9+
metadata:
10+
name: pyspark-delta
11+
status:
12+
phase: Succeeded
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
---
2+
apiVersion: spark.stackable.tech/v1alpha1
3+
kind: SparkApplication
4+
metadata:
5+
name: pyspark-delta
6+
spec:
7+
{% if lookup('env', 'VECTOR_AGGREGATOR') %}
8+
vectorAggregatorConfigMapName: vector-aggregator-discovery
9+
{% endif %}
10+
sparkImage:
11+
{% if test_scenario['values']['spark'].find(",") > 0 %}
12+
custom: "{{ test_scenario['values']['spark'].split(',')[1] }}"
13+
productVersion: "{{ test_scenario['values']['spark'].split(',')[0] }}"
14+
{% else %}
15+
productVersion: "{{ test_scenario['values']['spark'] }}"
16+
{% endif %}
17+
pullPolicy: IfNotPresent
18+
mode: cluster
19+
mainApplicationFile: "local:///stackable/spark/jobs/write-to-delta.py"
20+
s3connection:
21+
inline:
22+
host: test-minio
23+
port: 9000
24+
accessStyle: Path
25+
credentials:
26+
secretClass: s3-credentials-class
27+
job:
28+
logging:
29+
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
30+
driver:
31+
config:
32+
logging:
33+
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
34+
volumeMounts:
35+
- name: script
36+
mountPath: /stackable/spark/jobs
37+
executor:
38+
replicas: 1
39+
config:
40+
logging:
41+
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
42+
volumeMounts:
43+
- name: script
44+
mountPath: /stackable/spark/jobs
45+
deps:
46+
requirements:
47+
- importlib-metadata
48+
- delta-spark=={{ test_scenario['values']['delta'] }}
49+
packages:
50+
- io.delta:delta-spark_2.12:{{ test_scenario['values']['delta'] }}
51+
volumes:
52+
- name: script
53+
configMap:
54+
name: write-to-delta
55+
---
56+
apiVersion: v1
57+
kind: ConfigMap
58+
metadata:
59+
name: write-to-delta
60+
data:
61+
write-to-delta.py: |
62+
from datetime import datetime, date
63+
from pyspark.sql import Row, SparkSession
64+
from delta import *
65+
66+
def main():
67+
builder = SparkSession.builder.appName("MyApp") \
68+
.config("spark.hadoop.fs.s3a.aws.credentials.provider", "org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider") \
69+
.config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
70+
.config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
71+
.config("spark.hadoop.fs.s3a.impl", "org.apache.hadoop.fs.s3a.S3AFileSystem") \
72+
.config("spark.delta.logStore.class", "org.apache.spark.sql.delta.storage.S3SingleDriverLogStore") \
73+
.config("spark.hadoop.delta.enableFastS3AListFrom", "true")
74+
75+
spark = configure_spark_with_delta_pip(builder).getOrCreate()
76+
77+
df = spark.createDataFrame([
78+
Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),
79+
Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),
80+
Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))
81+
])
82+
83+
location = "s3a://my-bucket/spark-delta-test"
84+
85+
df.write.format("delta").mode("overwrite").save(location)
86+
87+
if __name__ == "__main__":
88+
main()
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
---
2+
volumePermissions:
3+
enabled: false
4+
5+
podSecurityContext:
6+
enabled: false
7+
8+
containerSecurityContext:
9+
enabled: false
10+
11+
mode: standalone
12+
13+
disableWebUI: true
14+
15+
persistence:
16+
enabled: false
17+
18+
resources:
19+
requests:
20+
memory: 1Gi
21+
22+
auth:
23+
existingSecret: minio-credentials

tests/test-definition.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ dimensions:
1717
values:
1818
- "false"
1919
- "true"
20+
- name: delta
21+
values:
22+
- 3.1.0
2023
tests:
2124
- name: smoke
2225
dimensions:
@@ -70,6 +73,10 @@ tests:
7073
- name: iceberg
7174
dimensions:
7275
- spark
76+
- name: delta
77+
dimensions:
78+
- spark
79+
- delta
7380
suites:
7481
- name: nightly
7582
patch:

0 commit comments

Comments
 (0)