Skip to content

Commit 2c807ca

Browse files
authored
JVM config overrides (#272)
* wip * Update changelog. * First working version * Update smoke test * Fix path to security.properties for histo server. * Update docs. * Fix logging test. * Remove redundant java options. * Update smoke test. * Remove debug=ssl property. * Revert "Remove redundant java options." This reverts commit e8d79f3. * Remove SPARK_DAEMON_JAVA_OPTS * Remove javax.net.debug
1 parent 22545ef commit 2c807ca

File tree

12 files changed

+271
-51
lines changed

12 files changed

+271
-51
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,18 @@ All notable changes to this project will be documented in this file.
77
### Added
88

99
- Default stackableVersion to operator version. It is recommended to remove `spec.image.stackableVersion` from your custom resources ([#267], [#268]).
10+
- Configuration overrides for the JVM security properties, such as DNS caching ([#272]).
1011

1112
### Changed
1213

1314
- `vector` `0.26.0` -> `0.31.0` ([#269]).
1415
- `operator-rs` `0.44.0` -> `0.45.1` ([#267]).
16+
- Removed usages of SPARK_DAEMON_JAVA_OPTS since it's not a reliable way to pass extra JVM options ([#272]).
1517

1618
[#267]: https://github.com/stackabletech/spark-k8s-operator/pull/267
1719
[#268]: https://github.com/stackabletech/spark-k8s-operator/pull/268
1820
[#269]: https://github.com/stackabletech/spark-k8s-operator/pull/269
21+
[#272]: https://github.com/stackabletech/spark-k8s-operator/pull/272
1922

2023
## [23.7.0] - 2023-07-14
2124

deploy/config-spec/properties.yaml

+38-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,41 @@
33
version: 0.1.0
44
spec:
55
units: []
6-
properties: []
6+
properties:
7+
- property: &jvmDnsCacheTtl
8+
propertyNames:
9+
- name: "networkaddress.cache.ttl"
10+
kind:
11+
type: "file"
12+
file: "security.properties"
13+
datatype:
14+
type: "integer"
15+
min: "0"
16+
recommendedValues:
17+
- fromVersion: "0.0.0"
18+
value: "30"
19+
roles:
20+
- name: "node"
21+
required: true
22+
asOfVersion: "0.0.0"
23+
comment: "History server - TTL for successfully resolved domain names."
24+
description: "History server - TTL for successfully resolved domain names."
25+
26+
- property: &jvmDnsCacheNegativeTtl
27+
propertyNames:
28+
- name: "networkaddress.cache.negative.ttl"
29+
kind:
30+
type: "file"
31+
file: "security.properties"
32+
datatype:
33+
type: "integer"
34+
min: "0"
35+
recommendedValues:
36+
- fromVersion: "0.0.0"
37+
value: "0"
38+
roles:
39+
- name: "node"
40+
required: true
41+
asOfVersion: "0.0.0"
42+
comment: "History server - TTL for domain names that cannot be resolved."
43+
description: "History server - TTL for domain names that cannot be resolved."

deploy/helm/spark-k8s-operator/configs/properties.yaml

+38-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,41 @@
33
version: 0.1.0
44
spec:
55
units: []
6-
properties: []
6+
properties:
7+
- property: &jvmDnsCacheTtl
8+
propertyNames:
9+
- name: "networkaddress.cache.ttl"
10+
kind:
11+
type: "file"
12+
file: "security.properties"
13+
datatype:
14+
type: "integer"
15+
min: "0"
16+
recommendedValues:
17+
- fromVersion: "0.0.0"
18+
value: "30"
19+
roles:
20+
- name: "node"
21+
required: true
22+
asOfVersion: "0.0.0"
23+
comment: "History server - TTL for successfully resolved domain names."
24+
description: "History server - TTL for successfully resolved domain names."
25+
26+
- property: &jvmDnsCacheNegativeTtl
27+
propertyNames:
28+
- name: "networkaddress.cache.negative.ttl"
29+
kind:
30+
type: "file"
31+
file: "security.properties"
32+
datatype:
33+
type: "integer"
34+
min: "0"
35+
recommendedValues:
36+
- fromVersion: "0.0.0"
37+
value: "0"
38+
roles:
39+
- name: "node"
40+
required: true
41+
asOfVersion: "0.0.0"
42+
comment: "History server - TTL for domain names that cannot be resolved."
43+
description: "History server - TTL for domain names that cannot be resolved."

deploy/helm/spark-k8s-operator/crds/crds.yaml

+12
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,12 @@ spec:
527527
type: array
528528
type: object
529529
type: object
530+
jvmSecurity:
531+
additionalProperties:
532+
nullable: true
533+
type: string
534+
default: {}
535+
type: object
530536
logging:
531537
default:
532538
enableVectorAgent: null
@@ -4015,6 +4021,12 @@ spec:
40154021
minimum: 0.0
40164022
nullable: true
40174023
type: integer
4024+
jvmSecurity:
4025+
additionalProperties:
4026+
nullable: true
4027+
type: string
4028+
default: {}
4029+
type: object
40184030
logging:
40194031
default:
40204032
enableVectorAgent: null

docs/modules/spark-k8s/pages/crd-reference.adoc

+6
Original file line numberDiff line numberDiff line change
@@ -116,4 +116,10 @@ Below are listed the CRD fields that can be defined by the user:
116116
|`spec.logFileDirectory.prefix`
117117
|Prefix to use when storing events for the Spark History server.
118118

119+
|`spec.driver.jvmSecurity`
120+
|A list JVM security properties to pass on to the driver VM. The TTL of DNS caches are especially important.
121+
122+
|`spec.executor.jvmSecurity`
123+
|A list JVM security properties to pass on to the executor VM. The TTL of DNS caches are especially important.
124+
119125
|===

docs/modules/spark-k8s/pages/usage-guide/history-server.adoc

+26-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ The secret with S3 credentials must contain at least the following two keys:
3131

3232
Any other entries of the Secret are ignored by the operator.
3333

34-
== Application configuration
34+
== Spark application configuration
3535

3636

3737
The example below demonstrates how to configure Spark applications to write log events to an S3 bucket.
@@ -65,3 +65,28 @@ spark-history-node-cleaner NodePort 10.96.203.43 <none> 18080:325
6565
By setting up port forwarding on 18080 the UI can be opened by pointing your browser to `http://localhost:18080`:
6666

6767
image::history-server-ui.png[History Server Console]
68+
69+
== Configuration Properties
70+
71+
For a role group of the Spark history server, you can specify: `configOverrides` for the following files:
72+
73+
- `security.properties`
74+
75+
=== The security.properties file
76+
77+
The `security.properties` file is used to configure JVM security properties. It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache.
78+
79+
The JVM manages it's own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. As of version 3.4.0, Apache Spark may perform poorly if the positive cache is disabled. To cache resolved host names, and thus speeding up queries you can configure the TTL of entries in the positive cache like this:
80+
81+
[source,yaml]
82+
----
83+
nodes:
84+
configOverrides:
85+
security.properties:
86+
networkaddress.cache.ttl: "30"
87+
networkaddress.cache.negative.ttl: "0"
88+
----
89+
90+
NOTE: The operator configures DNS caching by default as shown in the example above.
91+
92+
For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html

rust/crd/src/constants.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ pub const VOLUME_MOUNT_PATH_EXECUTOR_POD_TEMPLATES: &str =
1212
pub const POD_TEMPLATE_FILE: &str = "template.yaml";
1313

1414
pub const VOLUME_MOUNT_NAME_CONFIG: &str = "config";
15+
pub const VOLUME_MOUNT_PATH_CONFIG: &str = "/stackable/spark/conf";
1516

1617
pub const VOLUME_MOUNT_NAME_JOB: &str = "job-files";
1718
pub const VOLUME_MOUNT_PATH_JOB: &str = "/stackable/spark/jobs";
@@ -27,6 +28,8 @@ pub const VOLUME_MOUNT_PATH_LOG: &str = "/stackable/log";
2728

2829
pub const LOG4J2_CONFIG_FILE: &str = "log4j2.properties";
2930

31+
pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties";
32+
3033
pub const ACCESS_KEY_ID: &str = "accessKey";
3134
pub const SECRET_ACCESS_KEY: &str = "secretKey";
3235
pub const S3_SECRET_DIR_NAME: &str = "/stackable/secrets";
@@ -67,8 +70,7 @@ pub const HISTORY_ROLE_NAME: &str = "node";
6770

6871
pub const HISTORY_IMAGE_BASE_NAME: &str = "spark-k8s";
6972

70-
pub const HISTORY_CONFIG_FILE_NAME: &str = "spark-defaults.conf";
71-
pub const HISTORY_CONFIG_FILE_NAME_FULL: &str = "/stackable/spark/conf/spark-defaults.conf";
73+
pub const SPARK_DEFAULTS_FILE_NAME: &str = "spark-defaults.conf";
7274

7375
pub const SPARK_CLUSTER_ROLE: &str = "spark-k8s-clusterrole";
7476
pub const SPARK_UID: i64 = 1000;

rust/crd/src/history.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,10 @@ impl SparkHistoryServer {
200200
> = vec![(
201201
HISTORY_ROLE_NAME.to_string(),
202202
(
203-
vec![PropertyNameKind::File(HISTORY_CONFIG_FILE_NAME.to_string())],
203+
vec![
204+
PropertyNameKind::File(SPARK_DEFAULTS_FILE_NAME.to_string()),
205+
PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()),
206+
],
204207
self.spec.nodes.clone(),
205208
),
206209
)]

rust/crd/src/lib.rs

+50-16
Original file line numberDiff line numberDiff line change
@@ -538,11 +538,27 @@ impl SparkApplication {
538538
}
539539
}
540540

541-
// s3 with TLS
541+
// Extra JVM opts:
542+
// - java security properties
543+
// - s3 with TLS
544+
let mut extra_java_opts = vec![format!(
545+
"-Djava.security.properties={VOLUME_MOUNT_PATH_LOG_CONFIG}/{JVM_SECURITY_PROPERTIES_FILE}"
546+
)];
542547
if tlscerts::tls_secret_names(s3conn, s3_log_dir).is_some() {
543-
submit_cmd.push(format!("--conf spark.driver.extraJavaOptions=\"-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}/truststore.p12 -Djavax.net.ssl.trustStorePassword={STACKABLE_TLS_STORE_PASSWORD} -Djavax.net.ssl.trustStoreType=pkcs12 -Djavax.net.debug=ssl,handshake\""));
544-
submit_cmd.push(format!("--conf spark.executor.extraJavaOptions=\"-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}/truststore.p12 -Djavax.net.ssl.trustStorePassword={STACKABLE_TLS_STORE_PASSWORD} -Djavax.net.ssl.trustStoreType=pkcs12 -Djavax.net.debug=ssl,handshake\""));
548+
extra_java_opts.extend(
549+
vec![
550+
format!("-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}/truststore.p12"),
551+
format!("-Djavax.net.ssl.trustStorePassword={STACKABLE_TLS_STORE_PASSWORD}"),
552+
format!("-Djavax.net.ssl.trustStoreType=pkcs12"),
553+
]
554+
.into_iter(),
555+
);
545556
}
557+
let str_extra_java_opts = extra_java_opts.join(" ");
558+
submit_cmd.extend(vec![
559+
format!("--conf spark.driver.extraJavaOptions=\"{str_extra_java_opts}\""),
560+
format!("--conf spark.executor.extraJavaOptions=\"{str_extra_java_opts}\""),
561+
]);
546562

547563
// repositories and packages arguments
548564
if let Some(deps) = self.spec.deps.clone() {
@@ -642,18 +658,6 @@ impl SparkApplication {
642658
value_from: None,
643659
});
644660
}
645-
if let Some(s3logdir) = s3logdir {
646-
if tlscerts::tls_secret_name(&s3logdir.bucket.connection).is_some() {
647-
e.push(EnvVar {
648-
name: "SPARK_DAEMON_JAVA_OPTS".to_string(),
649-
value: Some(format!(
650-
"-Djavax.net.ssl.trustStore={STACKABLE_TRUST_STORE}/truststore.p12 -Djavax.net.ssl.trustStorePassword={STACKABLE_TLS_STORE_PASSWORD} -Djavax.net.ssl.trustStoreType=pkcs12"
651-
)),
652-
value_from: None,
653-
});
654-
}
655-
}
656-
657661
e
658662
}
659663

@@ -957,6 +961,8 @@ pub struct DriverConfig {
957961
#[fragment_attrs(serde(default))]
958962
#[fragment_attrs(schemars(schema_with = "pod_overrides_schema"))]
959963
pub pod_overrides: PodTemplateSpec,
964+
#[fragment_attrs(serde(default))]
965+
pub jvm_security: HashMap<String, Option<String>>,
960966
}
961967

962968
impl DriverConfig {
@@ -977,6 +983,18 @@ impl DriverConfig {
977983
volume_mounts: Some(VolumeMounts::default()),
978984
affinity: StackableAffinityFragment::default(),
979985
pod_overrides: PodTemplateSpec::default(),
986+
jvm_security: vec![
987+
(
988+
"networkaddress.cache.ttl".to_string(),
989+
Some("30".to_string()),
990+
),
991+
(
992+
"networkaddress.cache.negative.ttl".to_string(),
993+
Some("0".to_string()),
994+
),
995+
]
996+
.into_iter()
997+
.collect(),
980998
}
981999
}
9821000
}
@@ -1011,6 +1029,8 @@ pub struct ExecutorConfig {
10111029
#[fragment_attrs(serde(default))]
10121030
#[fragment_attrs(schemars(schema_with = "pod_overrides_schema"))]
10131031
pub pod_overrides: PodTemplateSpec,
1032+
#[fragment_attrs(serde(default))]
1033+
pub jvm_security: HashMap<String, Option<String>>,
10141034
}
10151035

10161036
impl ExecutorConfig {
@@ -1033,6 +1053,18 @@ impl ExecutorConfig {
10331053
node_selector: Default::default(),
10341054
affinity: Default::default(),
10351055
pod_overrides: PodTemplateSpec::default(),
1056+
jvm_security: vec![
1057+
(
1058+
"networkaddress.cache.ttl".to_string(),
1059+
Some("30".to_string()),
1060+
),
1061+
(
1062+
"networkaddress.cache.negative.ttl".to_string(),
1063+
Some("0".to_string()),
1064+
),
1065+
]
1066+
.into_iter()
1067+
.collect(),
10361068
}
10371069
}
10381070
}
@@ -1053,7 +1085,7 @@ mod tests {
10531085
};
10541086
use stackable_operator::k8s_openapi::api::core::v1::PodTemplateSpec;
10551087
use stackable_operator::product_logging::spec::Logging;
1056-
use std::collections::BTreeMap;
1088+
use std::collections::{BTreeMap, HashMap};
10571089
use std::str::FromStr;
10581090

10591091
#[test]
@@ -1419,6 +1451,7 @@ spec:
14191451
volume_mounts: None,
14201452
affinity: StackableAffinity::default(),
14211453
pod_overrides: PodTemplateSpec::default(),
1454+
jvm_security: HashMap::new(),
14221455
};
14231456

14241457
let mut props = BTreeMap::new();
@@ -1474,6 +1507,7 @@ spec:
14741507
node_selector: None,
14751508
affinity: StackableAffinity::default(),
14761509
pod_overrides: PodTemplateSpec::default(),
1510+
jvm_security: HashMap::new(),
14771511
};
14781512

14791513
let mut props = BTreeMap::new();

rust/crd/src/s3logdir.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ impl S3LogDir {
9595
}
9696

9797
/// Constructs the properties needed for loading event logs from S3.
98-
/// These properties are later written in the `HISTORY_CONFIG_FILE_NAME_FULL` file.
98+
/// These properties are later written in the `SPARK_DEFAULTS_FILE_NAME` file.
9999
///
100100
/// The following properties related to credentials are not included:
101101
/// * spark.hadoop.fs.s3a.aws.credentials.provider

0 commit comments

Comments
 (0)