Skip to content

Commit 8b278ba

Browse files
razvansbernauer
andauthored
JVM config overrides (#384)
* First working version. * Update docs and use default JVM props. * Typo * Update docs/modules/hdfs/pages/usage-guide/configuration-environment-overrides.adoc Co-authored-by: Sebastian Bernauer <[email protected]> --------- Co-authored-by: Sebastian Bernauer <[email protected]>
1 parent e85d5f6 commit 8b278ba

File tree

8 files changed

+320
-10
lines changed

8 files changed

+320
-10
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ All notable changes to this project will be documented in this file.
77
### Added
88

99
- Default stackableVersion to operator version ([#381]).
10+
- Configuration overrides for the JVM security properties, such as DNS caching ([#384]).
1011

1112
### Changed
1213

@@ -15,6 +16,7 @@ All notable changes to this project will be documented in this file.
1516

1617
[#378]: https://github.com/stackabletech/hdfs-operator/pull/378
1718
[#381]: https://github.com/stackabletech/hdfs-operator/pull/381
19+
[#384]: https://github.com/stackabletech/hdfs-operator/pull/384
1820

1921
## [23.7.0] - 2023-07-14
2022

@@ -85,7 +87,6 @@ All notable changes to this project will be documented in this file.
8587
[#341]: https://github.com/stackabletech/hdfs-operator/pull/341
8688
[#342]: https://github.com/stackabletech/hdfs-operator/pull/342
8789

88-
8990
## [23.1.0] - 2023-01-23
9091

9192
### Added

deploy/config-spec/properties.yaml

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,120 @@ spec:
99
comment: "Specified in https://tools.ietf.org/html/rfc3986#appendix-B"
1010

1111
properties:
12+
- property:
13+
propertyNames:
14+
- name: "networkaddress.cache.ttl"
15+
kind:
16+
type: "file"
17+
file: "security.properties"
18+
datatype:
19+
type: "integer"
20+
min: "0"
21+
recommendedValues:
22+
- fromVersion: "0.0.0"
23+
value: "30"
24+
roles:
25+
- name: "namenode"
26+
required: true
27+
asOfVersion: "0.0.0"
28+
comment: "TTL for successfully resolved domain names."
29+
description: "TTL for successfully resolved domain names."
30+
31+
- property:
32+
propertyNames:
33+
- name: "networkaddress.cache.ttl"
34+
kind:
35+
type: "file"
36+
file: "security.properties"
37+
datatype:
38+
type: "integer"
39+
min: "0"
40+
recommendedValues:
41+
- fromVersion: "0.0.0"
42+
value: "30"
43+
roles:
44+
- name: "datanode"
45+
required: true
46+
asOfVersion: "0.0.0"
47+
comment: "TTL for successfully resolved domain names."
48+
description: "TTL for successfully resolved domain names."
49+
50+
- property:
51+
propertyNames:
52+
- name: "networkaddress.cache.ttl"
53+
kind:
54+
type: "file"
55+
file: "security.properties"
56+
datatype:
57+
type: "integer"
58+
min: "0"
59+
recommendedValues:
60+
- fromVersion: "0.0.0"
61+
value: "30"
62+
roles:
63+
- name: "journalnode"
64+
required: true
65+
asOfVersion: "0.0.0"
66+
comment: "TTL for successfully resolved domain names."
67+
description: "TTL for successfully resolved domain names."
68+
69+
- property:
70+
propertyNames:
71+
- name: "networkaddress.cache.negative.ttl"
72+
kind:
73+
type: "file"
74+
file: "security.properties"
75+
datatype:
76+
type: "integer"
77+
min: "0"
78+
recommendedValues:
79+
- fromVersion: "0.0.0"
80+
value: "0"
81+
roles:
82+
- name: "namenode"
83+
required: true
84+
asOfVersion: "0.0.0"
85+
comment: "TTL for domain names that cannot be resolved."
86+
description: "TTL for domain names that cannot be resolved."
87+
88+
- property:
89+
propertyNames:
90+
- name: "networkaddress.cache.negative.ttl"
91+
kind:
92+
type: "file"
93+
file: "security.properties"
94+
datatype:
95+
type: "integer"
96+
min: "0"
97+
recommendedValues:
98+
- fromVersion: "0.0.0"
99+
value: "0"
100+
roles:
101+
- name: "datanode"
102+
required: true
103+
asOfVersion: "0.0.0"
104+
comment: "TTL for domain names that cannot be resolved."
105+
description: "TTL for domain names that cannot be resolved."
106+
107+
- property:
108+
propertyNames:
109+
- name: "networkaddress.cache.negative.ttl"
110+
kind:
111+
type: "file"
112+
file: "security.properties"
113+
datatype:
114+
type: "integer"
115+
min: "0"
116+
recommendedValues:
117+
- fromVersion: "0.0.0"
118+
value: "0"
119+
roles:
120+
- name: "journalnode"
121+
required: true
122+
asOfVersion: "0.0.0"
123+
comment: "TTL for domain names that cannot be resolved."
124+
description: "TTL for domain names that cannot be resolved."
125+
12126
- property:
13127
propertyNames:
14128
- name: "dfs.namenode.name.dir"
@@ -164,4 +278,3 @@ properties:
164278
required: false
165279
asOfVersion: "0.0.0"
166280
description: "The address and port the JournalNode HTTPS server listens on. If the port is 0 then the server will start on a free port."
167-

deploy/helm/hdfs-operator/configs/properties.yaml

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,120 @@ spec:
99
comment: "Specified in https://tools.ietf.org/html/rfc3986#appendix-B"
1010

1111
properties:
12+
- property:
13+
propertyNames:
14+
- name: "networkaddress.cache.ttl"
15+
kind:
16+
type: "file"
17+
file: "security.properties"
18+
datatype:
19+
type: "integer"
20+
min: "0"
21+
recommendedValues:
22+
- fromVersion: "0.0.0"
23+
value: "30"
24+
roles:
25+
- name: "namenode"
26+
required: true
27+
asOfVersion: "0.0.0"
28+
comment: "TTL for successfully resolved domain names."
29+
description: "TTL for successfully resolved domain names."
30+
31+
- property:
32+
propertyNames:
33+
- name: "networkaddress.cache.ttl"
34+
kind:
35+
type: "file"
36+
file: "security.properties"
37+
datatype:
38+
type: "integer"
39+
min: "0"
40+
recommendedValues:
41+
- fromVersion: "0.0.0"
42+
value: "30"
43+
roles:
44+
- name: "datanode"
45+
required: true
46+
asOfVersion: "0.0.0"
47+
comment: "TTL for successfully resolved domain names."
48+
description: "TTL for successfully resolved domain names."
49+
50+
- property:
51+
propertyNames:
52+
- name: "networkaddress.cache.ttl"
53+
kind:
54+
type: "file"
55+
file: "security.properties"
56+
datatype:
57+
type: "integer"
58+
min: "0"
59+
recommendedValues:
60+
- fromVersion: "0.0.0"
61+
value: "30"
62+
roles:
63+
- name: "journalnode"
64+
required: true
65+
asOfVersion: "0.0.0"
66+
comment: "TTL for successfully resolved domain names."
67+
description: "TTL for successfully resolved domain names."
68+
69+
- property:
70+
propertyNames:
71+
- name: "networkaddress.cache.negative.ttl"
72+
kind:
73+
type: "file"
74+
file: "security.properties"
75+
datatype:
76+
type: "integer"
77+
min: "0"
78+
recommendedValues:
79+
- fromVersion: "0.0.0"
80+
value: "0"
81+
roles:
82+
- name: "namenode"
83+
required: true
84+
asOfVersion: "0.0.0"
85+
comment: "TTL for domain names that cannot be resolved."
86+
description: "TTL for domain names that cannot be resolved."
87+
88+
- property:
89+
propertyNames:
90+
- name: "networkaddress.cache.negative.ttl"
91+
kind:
92+
type: "file"
93+
file: "security.properties"
94+
datatype:
95+
type: "integer"
96+
min: "0"
97+
recommendedValues:
98+
- fromVersion: "0.0.0"
99+
value: "0"
100+
roles:
101+
- name: "datanode"
102+
required: true
103+
asOfVersion: "0.0.0"
104+
comment: "TTL for domain names that cannot be resolved."
105+
description: "TTL for domain names that cannot be resolved."
106+
107+
- property:
108+
propertyNames:
109+
- name: "networkaddress.cache.negative.ttl"
110+
kind:
111+
type: "file"
112+
file: "security.properties"
113+
datatype:
114+
type: "integer"
115+
min: "0"
116+
recommendedValues:
117+
- fromVersion: "0.0.0"
118+
value: "0"
119+
roles:
120+
- name: "journalnode"
121+
required: true
122+
asOfVersion: "0.0.0"
123+
comment: "TTL for domain names that cannot be resolved."
124+
description: "TTL for domain names that cannot be resolved."
125+
12126
- property:
13127
propertyNames:
14128
- name: "dfs.namenode.name.dir"
@@ -164,4 +278,3 @@ properties:
164278
required: false
165279
asOfVersion: "0.0.0"
166280
description: "The address and port the JournalNode HTTPS server listens on. If the port is 0 then the server will start on a free port."
167-

docs/modules/hdfs/pages/usage-guide/configuration-environment-overrides.adoc

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,17 @@ IMPORTANT: Overriding certain properties can lead to faulty clusters. In general
77

88
== Configuration Properties
99

10-
For a role or role group, at the same level of `config`, you can specify `configOverrides` for the `hdfs-site.xml` and `core-site.xml`. For example, if you want to set additional properties on the namenode servers, adapt the `nameNodes` section of the cluster resource like so:
10+
For a role or role group, at the same level of `config`, you can specify `configOverrides` for the following files:
11+
12+
- `hdfs-site.xml`
13+
- `core-site.xml`
14+
- `hadoop-policy.xml`
15+
- `ssl-server.xml`
16+
- `ssl-client.xml`
17+
- `security.properties`
18+
19+
20+
For example, if you want to set additional properties on the namenode servers, adapt the `nameNodes` section of the cluster resource like so:
1121

1222
[source,yaml]
1323
----
@@ -43,6 +53,35 @@ All override property values must be strings. The properties will be formatted a
4353

4454
For a full list of configuration options we refer to the Apache Hdfs documentation for https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/hdfs-default.xml[hdfs-site.xml] and https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/core-default.xml[core-site.xml]
4555

56+
=== The security.properties file
57+
58+
The `security.properties` file is used to configure JVM security properties. It is very seldom that users need to tweak any of these, but there is one use-case that stands out, and that users need to be aware of: the JVM DNS cache.
59+
60+
The JVM manages it's own cache of successfully resolved host names as well as a cache of host names that cannot be resolved. Some products of the Stackable platform are very sensible to the contents of these caches and their performance is heavily affected by them. As of version 3.3.4 HDFS performs poorly if the positive cache is disabled. To cache resolved host names, and thus speeding up Hbase queries you can configure the TTL of entries in the positive cache like this:
61+
62+
[source,yaml]
63+
----
64+
namenodes:
65+
configOverrides:
66+
security.properties:
67+
networkaddress.cache.ttl: "30"
68+
networkaddress.cache.negative.ttl: "0"
69+
datanodes:
70+
configOverrides:
71+
security.properties:
72+
networkaddress.cache.ttl: "30"
73+
networkaddress.cache.negative.ttl: "0"
74+
journalnodes:
75+
configOverrides:
76+
security.properties:
77+
networkaddress.cache.ttl: "30"
78+
networkaddress.cache.negative.ttl: "0"
79+
----
80+
81+
NOTE: The operator configures DNS caching by default as shown in the example above.
82+
83+
For details on the JVM security see https://docs.oracle.com/en/java/javase/11/security/java-security-overview1.html
84+
4685

4786
== Environment Variables
4887

rust/crd/src/constants.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub const HADOOP_POLICY_XML: &str = "hadoop-policy.xml";
1616
pub const SSL_SERVER_XML: &str = "ssl-server.xml";
1717
pub const SSL_CLIENT_XML: &str = "ssl-client.xml";
1818
pub const LOG4J_PROPERTIES: &str = "log4j.properties";
19+
pub const JVM_SECURITY_PROPERTIES_FILE: &str = "security.properties";
1920

2021
pub const SERVICE_PORT_NAME_RPC: &str = "rpc";
2122
pub const SERVICE_PORT_NAME_IPC: &str = "ipc";

rust/crd/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,7 @@ impl HdfsCluster {
616616
PropertyNameKind::File(HADOOP_POLICY_XML.to_string()),
617617
PropertyNameKind::File(SSL_SERVER_XML.to_string()),
618618
PropertyNameKind::File(SSL_CLIENT_XML.to_string()),
619+
PropertyNameKind::File(JVM_SECURITY_PROPERTIES_FILE.to_string()),
619620
PropertyNameKind::Env,
620621
];
621622

rust/operator/src/container.rs

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ use snafu::{OptionExt, ResultExt, Snafu};
2525
use stackable_hdfs_crd::{
2626
constants::{
2727
DATANODE_ROOT_DATA_DIR_PREFIX, DEFAULT_DATA_NODE_METRICS_PORT,
28-
DEFAULT_JOURNAL_NODE_METRICS_PORT, DEFAULT_NAME_NODE_METRICS_PORT, LOG4J_PROPERTIES,
29-
NAMENODE_ROOT_DATA_DIR, SERVICE_PORT_NAME_IPC, SERVICE_PORT_NAME_RPC,
30-
STACKABLE_ROOT_DATA_DIR,
28+
DEFAULT_JOURNAL_NODE_METRICS_PORT, DEFAULT_NAME_NODE_METRICS_PORT,
29+
JVM_SECURITY_PROPERTIES_FILE, LOG4J_PROPERTIES, NAMENODE_ROOT_DATA_DIR,
30+
SERVICE_PORT_NAME_IPC, SERVICE_PORT_NAME_RPC, STACKABLE_ROOT_DATA_DIR,
3131
},
3232
storage::DataNodeStorageConfig,
3333
DataNodeContainer, HdfsCluster, HdfsPodRef, HdfsRole, MergedConfig, NameNodeContainer,
@@ -975,6 +975,8 @@ impl ContainerConfig {
975975
ContainerConfig::Hdfs {
976976
role, metrics_port, ..
977977
} => {
978+
let cvd = ContainerVolumeDirs::from(role);
979+
let config_dir = cvd.final_config();
978980
// This currently points at 0.16.1 for historic reasons.
979981
// We used to (and actually still do) hardcode the version number of JMX Exporter
980982
// Newer Docker Images (anything after 23.7) do upgrade the version however and they do
@@ -985,7 +987,7 @@ impl ContainerConfig {
985987
// We can fix this properly by pointing at the versionless JAR in one of our upcoming releases (e.g. 24.x)
986988
let mut jvm_args = vec![
987989
format!(
988-
"-javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar={metrics_port}:/stackable/jmx/{role}.yaml",
990+
"-Djava.security.properties={config_dir}/{JVM_SECURITY_PROPERTIES_FILE} -javaagent:/stackable/jmx/jmx_prometheus_javaagent-0.16.1.jar={metrics_port}:/stackable/jmx/{role}.yaml",
989991
)];
990992

991993
if hdfs.has_kerberos_enabled() {
@@ -1259,6 +1261,18 @@ impl From<HdfsRole> for ContainerVolumeDirs {
12591261
}
12601262
}
12611263

1264+
impl From<&HdfsRole> for ContainerVolumeDirs {
1265+
fn from(role: &HdfsRole) -> Self {
1266+
ContainerVolumeDirs {
1267+
final_config_dir: format!("{base}/{role}", base = Self::NODE_BASE_CONFIG_DIR),
1268+
config_mount: format!("{base}/{role}", base = Self::NODE_BASE_CONFIG_DIR_MOUNT),
1269+
config_mount_name: ContainerConfig::HDFS_CONFIG_VOLUME_MOUNT_NAME.to_string(),
1270+
log_mount: format!("{base}/{role}", base = Self::NODE_BASE_LOG_DIR_MOUNT),
1271+
log_mount_name: ContainerConfig::HDFS_LOG_VOLUME_MOUNT_NAME.to_string(),
1272+
}
1273+
}
1274+
}
1275+
12621276
impl TryFrom<&str> for ContainerVolumeDirs {
12631277
type Error = Error;
12641278

0 commit comments

Comments
 (0)