Skip to content

Commit 436ff43

Browse files
adwk67sbernauer
andauthored
fix: allow fine-granular resource CPU settings (#408)
* fix: allow fine-granular resource CPU settings * changelog * Update CHANGELOG.md Co-authored-by: Sebastian Bernauer <[email protected]> * adding note re. change-in-behaviour in changelog * Update CHANGELOG.md Co-authored-by: Sebastian Bernauer <[email protected]> --------- Co-authored-by: Sebastian Bernauer <[email protected]>
1 parent 48ac356 commit 436ff43

File tree

5 files changed

+35
-21
lines changed

5 files changed

+35
-21
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,14 @@ All notable changes to this project will be documented in this file.
88

99
- Update Rust dependency versions, most notably operator-rs 0.67.1 ([#401])
1010

11+
### Fixed
12+
13+
- BREAKING (behaviour): Specified CPU resources are now applied correctly (instead of rounding it to the next whole number).
14+
This might affect your jobs, as they now e.g. only have 200m CPU resources requested instead of the 1000m it had so far,
15+
meaning they might slow down significantly ([#408]).
16+
1117
[#401]: https://github.com/stackabletech/spark-k8s-operator/pull/401
18+
[#408]: https://github.com/stackabletech/spark-k8s-operator/pull/408
1219

1320
## [24.3.0] - 2024-03-20
1421

docs/modules/spark-k8s/pages/usage-guide/resources.adoc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,32 @@ To illustrate resource configuration consider the use-case where resources are d
5858

5959
=== CPU
6060

61-
CPU request and limit will be rounded up to the next integer value, resulting in the following:
61+
CPU request and limit will be used as defined in the custom resource resulting in the following:
6262

6363

6464
|===
65-
|CRD |Spark conf
65+
|CRD |spark.kubernetes.{driver/executor} cores|spark.{driver/executor} cores (rounded up)
6666

67+
|1800m
6768
|1800m
6869
|2
6970

71+
|100m
7072
|100m
7173
|1
7274

75+
|1.5
7376
|1.5
7477
|2
7578

79+
|2
7680
|2
7781
|2
7882
|===
7983

80-
Spark allows CPU limits to be set for the driver and executor using Spark settings (`spark.{driver|executor}.cores}`) as well as Kubernetes-specific ones (`spark.kubernetes.{driver,executor}.{request|limit}.cores`). `spark.kubernetes.executor.request.cores` takes precedence over `spark.executor.cores` in determining the pod CPU request, but does not affect task parallelism (the number of tasks an executor can run concurrently), so for this reason `spark.executor.cores` is set to the value of `spark.kubernetes.executor.limit.cores`.
84+
`spark.kubernetes.{driver,executor}.{request|limit}.cores` determine the actual pod CPU request and are taken directly from the manifest as defined by the user.
85+
`spark.{driver|executor}.cores}` are set to the rounded(-up) value of the manifest settings.
86+
Task parallelism (the number of tasks an executor can run concurrently), is determined by `spark.executor.cores`.
8187

8288
=== Memory
8389

rust/crd/src/lib.rs

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -915,15 +915,17 @@ fn resources_to_driver_props(
915915
..
916916
} = &driver_config.resources
917917
{
918-
let min_cores = cores_from_quantity(min.0.clone())?;
919-
let max_cores = cores_from_quantity(max.0.clone())?;
920-
// will have default value from resources to apply if nothing set specifically
921-
props.insert("spark.driver.cores".to_string(), max_cores.clone());
918+
let driver_cores = cores_from_quantity(max.0.clone())?;
919+
// take rounded value for driver.cores but actual values for the pod
920+
props.insert("spark.driver.cores".to_string(), driver_cores.clone());
922921
props.insert(
923922
"spark.kubernetes.driver.request.cores".to_string(),
924-
min_cores,
923+
min.0.clone(),
924+
);
925+
props.insert(
926+
"spark.kubernetes.driver.limit.cores".to_string(),
927+
max.0.clone(),
925928
);
926-
props.insert("spark.kubernetes.driver.limit.cores".to_string(), max_cores);
927929
}
928930

929931
if let Resources {
@@ -955,17 +957,16 @@ fn resources_to_executor_props(
955957
..
956958
} = &executor_config.resources
957959
{
958-
let min_cores = cores_from_quantity(min.0.clone())?;
959-
let max_cores = cores_from_quantity(max.0.clone())?;
960-
// will have default value from resources to apply if nothing set specifically
961-
props.insert("spark.executor.cores".to_string(), max_cores.clone());
960+
let executor_cores = cores_from_quantity(max.0.clone())?;
961+
// take rounded value for executor.cores (to determine the parallelism) but actual values for the pod
962+
props.insert("spark.executor.cores".to_string(), executor_cores.clone());
962963
props.insert(
963964
"spark.kubernetes.executor.request.cores".to_string(),
964-
min_cores,
965+
min.0.clone(),
965966
);
966967
props.insert(
967968
"spark.kubernetes.executor.limit.cores".to_string(),
968-
max_cores,
969+
max.0.clone(),
969970
);
970971
}
971972

@@ -1154,7 +1155,7 @@ mod tests {
11541155
),
11551156
(
11561157
"spark.kubernetes.driver.request.cores".to_string(),
1157-
"1".to_string(),
1158+
"250m".to_string(),
11581159
),
11591160
]
11601161
.into_iter()
@@ -1194,7 +1195,7 @@ mod tests {
11941195
("spark.executor.memory".to_string(), "128m".to_string()), // 128 and not 512 because memory overhead is subtracted
11951196
(
11961197
"spark.kubernetes.executor.request.cores".to_string(),
1197-
"1".to_string(),
1198+
"250m".to_string(),
11981199
),
11991200
(
12001201
"spark.kubernetes.executor.limit.cores".to_string(),

tests/templates/kuttl/resources/10-assert.yaml.j2

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ spec:
3333
resources:
3434
# these resources are set via Spark submit properties like "spark.driver.cores"
3535
limits:
36-
cpu: "2"
36+
cpu: 1200m
3737
memory: 1Gi
3838
requests:
39-
cpu: "1"
39+
cpu: 300m
4040
memory: 1Gi
4141
---
4242
apiVersion: v1
@@ -55,5 +55,5 @@ spec:
5555
cpu: "2"
5656
memory: 1Gi
5757
requests:
58-
cpu: "2"
58+
cpu: 1250m
5959
memory: 1Gi

tests/templates/kuttl/resources/10-deploy-spark-app.yaml.j2

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ spec:
3636
enableVectorAgent: {{ lookup('env', 'VECTOR_AGGREGATOR') | length > 0 }}
3737
resources:
3838
cpu:
39-
min: 200m
39+
min: 300m
4040
max: 1200m
4141
memory:
4242
limit: 1024Mi

0 commit comments

Comments
 (0)