Skip to content

Commit e3fe2ca

Browse files
razvansbernauer
andauthored
feat: Support setting TLS certificate lifetimes (#501)
* feat: Support setting TLS certificate lifetimes * chore: bump op-rs * Update rust/crd/src/history.rs Co-authored-by: Sebastian Bernauer <[email protected]> * Update rust/crd/src/roles.rs Co-authored-by: Sebastian Bernauer <[email protected]> * Update rust/crd/src/roles.rs Co-authored-by: Sebastian Bernauer <[email protected]> * cargo update -p rustls --------- Co-authored-by: Sebastian Bernauer <[email protected]>
1 parent 13368c2 commit e3fe2ca

File tree

10 files changed

+111
-34
lines changed

10 files changed

+111
-34
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file.
44

55
## [Unreleased]
66

7+
### Added
8+
9+
- The lifetime of auto generated TLS certificates is now configurable with the role and roleGroup
10+
config property `requestedSecretLifetime`. This helps reducing frequent Pod restarts ([#501]).
11+
12+
[#501]: https://github.com/stackabletech/spark-k8s-operator/pull/501
13+
714
## [24.11.0] - 2024-11-18
815

916
### Added

Cargo.lock

Lines changed: 7 additions & 18 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ serde = { version = "1.0", features = ["derive"] }
2121
serde_json = "1.0"
2222
serde_yaml = "0.9"
2323
snafu = "0.8"
24-
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.82.0" }
24+
stackable-operator = { git = "https://github.com/stackabletech/operator-rs.git", tag = "stackable-operator-0.83.0" }
2525
strum = { version = "0.26", features = ["derive"] }
2626
tokio = { version = "1.39", features = ["full"] }
2727
tracing = "0.1"

deploy/helm/spark-k8s-operator/crds/crds.yaml

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,10 @@ spec:
193193
nullable: true
194194
type: boolean
195195
type: object
196+
requestedSecretLifetime:
197+
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
198+
nullable: true
199+
type: string
196200
resources:
197201
default:
198202
cpu:
@@ -463,6 +467,10 @@ spec:
463467
nullable: true
464468
type: boolean
465469
type: object
470+
requestedSecretLifetime:
471+
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. That this can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
472+
nullable: true
473+
type: string
466474
resources:
467475
default:
468476
cpu:
@@ -549,6 +557,10 @@ spec:
549557
config:
550558
default: {}
551559
properties:
560+
requestedSecretLifetime:
561+
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
562+
nullable: true
563+
type: string
552564
resources:
553565
default:
554566
cpu:
@@ -1347,6 +1359,10 @@ spec:
13471359
nullable: true
13481360
type: boolean
13491361
type: object
1362+
requestedSecretLifetime:
1363+
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
1364+
nullable: true
1365+
type: string
13501366
resources:
13511367
default:
13521368
cpu:
@@ -1563,6 +1579,10 @@ spec:
15631579
nullable: true
15641580
type: boolean
15651581
type: object
1582+
requestedSecretLifetime:
1583+
description: Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`. This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
1584+
nullable: true
1585+
type: string
15661586
resources:
15671587
default:
15681588
cpu:

rust/crd/src/history.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use stackable_operator::{
2727
product_logging::{self, spec::Logging},
2828
role_utils::{Role, RoleGroup, RoleGroupRef},
2929
schemars::{self, JsonSchema},
30+
time::Duration,
3031
};
3132
use std::collections::{BTreeMap, HashMap};
3233
use strum::{Display, EnumIter};
@@ -400,9 +401,17 @@ pub struct HistoryConfig {
400401
pub logging: Logging<SparkHistoryServerContainer>,
401402
#[fragment_attrs(serde(default))]
402403
pub affinity: StackableAffinity,
404+
405+
/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
406+
/// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
407+
#[fragment_attrs(serde(default))]
408+
pub requested_secret_lifetime: Option<Duration>,
403409
}
404410

405411
impl HistoryConfig {
412+
// Auto TLS certificate lifetime
413+
const DEFAULT_HISTORY_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);
414+
406415
fn default_config(cluster_name: &str) -> HistoryConfigFragment {
407416
HistoryConfigFragment {
408417
cleaner: None,
@@ -419,6 +428,7 @@ impl HistoryConfig {
419428
},
420429
logging: product_logging::spec::default_logging(),
421430
affinity: history_affinity(cluster_name),
431+
requested_secret_lifetime: Some(Self::DEFAULT_HISTORY_SECRET_LIFETIME),
422432
}
423433
}
424434
}

rust/crd/src/lib.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use logdir::ResolvedLogDir;
1414
use product_config::{types::PropertyNameKind, ProductConfigManager};
1515
use serde::{Deserialize, Serialize};
1616
use snafu::{OptionExt, ResultExt, Snafu};
17+
use stackable_operator::time::Duration;
1718
use stackable_operator::{
1819
builder::pod::volume::{
1920
SecretFormat, SecretOperatorVolumeSourceBuilder, SecretOperatorVolumeSourceBuilderError,
@@ -281,6 +282,7 @@ impl SparkApplication {
281282
s3conn: &Option<S3ConnectionSpec>,
282283
logdir: &Option<ResolvedLogDir>,
283284
log_config_map: Option<&str>,
285+
requested_secret_lifetime: &Duration,
284286
) -> Result<Vec<Volume>, Error> {
285287
let mut result: Vec<Volume> = self.spec.volumes.clone();
286288

@@ -356,6 +358,7 @@ impl SparkApplication {
356358
.ephemeral(
357359
SecretOperatorVolumeSourceBuilder::new(cert_secret)
358360
.with_format(SecretFormat::TlsPkcs12)
361+
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
359362
.build()
360363
.context(TlsCertSecretClassVolumeBuildSnafu)?,
361364
)
@@ -1068,6 +1071,7 @@ mod tests {
10681071

10691072
use indoc::indoc;
10701073
use rstest::rstest;
1074+
use stackable_operator::time::Duration;
10711075
use std::collections::{BTreeMap, HashMap};
10721076

10731077
#[test]
@@ -1206,6 +1210,7 @@ mod tests {
12061210
},
12071211
volume_mounts: Default::default(),
12081212
affinity: StackableAffinity::default(),
1213+
requested_secret_lifetime: Some(Duration::from_days_unchecked(1)),
12091214
};
12101215

12111216
let mut props = BTreeMap::new();
@@ -1250,6 +1255,7 @@ mod tests {
12501255
},
12511256
volume_mounts: Default::default(),
12521257
affinity: StackableAffinity::default(),
1258+
requested_secret_lifetime: Some(Duration::from_days_unchecked(1)),
12531259
};
12541260

12551261
let mut props = BTreeMap::new();

rust/crd/src/logdir.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use stackable_operator::{
1616
secret_class::SecretClassVolume,
1717
},
1818
k8s_openapi::api::core::v1::{Volume, VolumeMount},
19+
time::Duration,
1920
};
2021
use std::collections::BTreeMap;
2122

@@ -103,9 +104,9 @@ impl ResolvedLogDir {
103104
}
104105
}
105106

106-
pub fn volumes(&self) -> Result<Vec<Volume>, Error> {
107+
pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result<Vec<Volume>, Error> {
107108
match self {
108-
ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(),
109+
ResolvedLogDir::S3(s3_log_dir) => s3_log_dir.volumes(requested_secret_lifetime),
109110
ResolvedLogDir::Custom(_) => Ok(vec![]),
110111
}
111112
}
@@ -248,7 +249,7 @@ impl S3LogDir {
248249
)
249250
}
250251

251-
pub fn volumes(&self) -> Result<Vec<Volume>, Error> {
252+
pub fn volumes(&self, requested_secret_lifetime: &Duration) -> Result<Vec<Volume>, Error> {
252253
let mut volumes: Vec<Volume> = self.credentials_volume()?.into_iter().collect();
253254

254255
if let Some(secret_name) = tlscerts::tls_secret_name(&self.bucket.connection) {
@@ -257,6 +258,7 @@ impl S3LogDir {
257258
.ephemeral(
258259
SecretOperatorVolumeSourceBuilder::new(secret_name)
259260
.with_format(SecretFormat::TlsPkcs12)
261+
.with_auto_tls_cert_lifetime(*requested_secret_lifetime)
260262
.build()
261263
.context(TlsCertSecretClassVolumeBuildSnafu)?,
262264
)

rust/crd/src/roles.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use stackable_operator::{
3636
product_config_utils::Configuration,
3737
product_logging::{self, spec::Logging},
3838
schemars::{self, JsonSchema},
39+
time::Duration,
3940
utils::crds::raw_object_list_schema,
4041
};
4142
use strum::{Display, EnumIter};
@@ -123,9 +124,17 @@ pub struct RoleConfig {
123124

124125
#[fragment_attrs(serde(default))]
125126
pub affinity: StackableAffinity,
127+
128+
/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
129+
/// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
130+
#[fragment_attrs(serde(default))]
131+
pub requested_secret_lifetime: Option<Duration>,
126132
}
127133

128134
impl RoleConfig {
135+
// Auto TLS certificate lifetime
136+
const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);
137+
129138
pub fn default_config() -> RoleConfigFragment {
130139
RoleConfigFragment {
131140
resources: ResourcesFragment {
@@ -142,6 +151,7 @@ impl RoleConfig {
142151
logging: product_logging::spec::default_logging(),
143152
volume_mounts: Some(VolumeMounts::default()),
144153
affinity: Default::default(),
154+
requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME),
145155
}
146156
}
147157
pub fn volume_mounts(
@@ -206,9 +216,17 @@ pub struct SubmitConfig {
206216
pub resources: Resources<SparkStorageConfig, NoRuntimeLimits>,
207217
#[fragment_attrs(serde(default, flatten))]
208218
pub volume_mounts: Option<VolumeMounts>,
219+
220+
/// Request secret (currently only autoTls certificates) lifetime from the secret operator, e.g. `7d`, or `30d`.
221+
/// This can be shortened by the `maxCertificateLifetime` setting on the SecretClass issuing the TLS certificate.
222+
#[fragment_attrs(serde(default))]
223+
pub requested_secret_lifetime: Option<Duration>,
209224
}
210225

211226
impl SubmitConfig {
227+
// Auto TLS certificate lifetime
228+
const DEFAULT_SECRET_LIFETIME: Duration = Duration::from_days_unchecked(7);
229+
212230
pub fn default_config() -> SubmitConfigFragment {
213231
SubmitConfigFragment {
214232
resources: ResourcesFragment {
@@ -223,6 +241,7 @@ impl SubmitConfig {
223241
storage: SparkStorageConfigFragment {},
224242
},
225243
volume_mounts: Some(VolumeMounts::default()),
244+
requested_secret_lifetime: Some(Self::DEFAULT_SECRET_LIFETIME),
226245
}
227246
}
228247
}

rust/operator-binary/src/history/history_controller.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ use strum::{EnumDiscriminants, IntoStaticStr};
6565
#[strum_discriminants(derive(IntoStaticStr))]
6666
#[allow(clippy::enum_variant_names)]
6767
pub enum Error {
68+
#[snafu(display("missing secret lifetime"))]
69+
MissingSecretLifetime,
70+
6871
#[snafu(display("object has no namespace"))]
6972
ObjectHasNoNamespace,
7073

@@ -444,15 +447,15 @@ fn build_stateful_set(
444447
resolved_product_image: &ResolvedProductImage,
445448
rolegroupref: &RoleGroupRef<SparkHistoryServer>,
446449
log_dir: &ResolvedLogDir,
447-
config: &HistoryConfig,
450+
merged_config: &HistoryConfig,
448451
serviceaccount: &ServiceAccount,
449452
) -> Result<StatefulSet, Error> {
450453
let log_config_map = if let Some(ContainerLogConfig {
451454
choice:
452455
Some(ContainerLogConfigChoice::Custom(CustomContainerLogConfig {
453456
custom: ConfigMapLogConfig { config_map },
454457
})),
455-
}) = config
458+
}) = merged_config
456459
.logging
457460
.containers
458461
.get(&SparkHistoryServerContainer::SparkHistory)
@@ -473,6 +476,9 @@ fn build_stateful_set(
473476

474477
let mut pb = PodBuilder::new();
475478

479+
let requested_secret_lifetime = merged_config
480+
.requested_secret_lifetime
481+
.context(MissingSecretLifetimeSnafu)?;
476482
pb.service_account_name(serviceaccount.name_unchecked())
477483
.metadata(metadata)
478484
.image_pull_secrets_from_product_image(resolved_product_image)
@@ -497,7 +503,11 @@ fn build_stateful_set(
497503
.build(),
498504
)
499505
.context(AddVolumeSnafu)?
500-
.add_volumes(log_dir.volumes().context(CreateLogDirVolumesSpecSnafu)?)
506+
.add_volumes(
507+
log_dir
508+
.volumes(&requested_secret_lifetime)
509+
.context(CreateLogDirVolumesSpecSnafu)?,
510+
)
501511
.context(AddVolumeSnafu)?
502512
.security_context(PodSecurityContext {
503513
run_as_user: Some(SPARK_UID),
@@ -516,7 +526,7 @@ fn build_stateful_set(
516526
let container = ContainerBuilder::new(container_name)
517527
.context(InvalidContainerNameSnafu)?
518528
.image_from_product_image(resolved_product_image)
519-
.resources(config.resources.clone().into())
529+
.resources(merged_config.resources.clone().into())
520530
.command(vec!["/bin/bash".to_string()])
521531
.args(command_args(log_dir))
522532
.add_container_port("http", 18080)
@@ -533,13 +543,13 @@ fn build_stateful_set(
533543
.build();
534544
pb.add_container(container);
535545

536-
if config.logging.enable_vector_agent {
546+
if merged_config.logging.enable_vector_agent {
537547
pb.add_container(
538548
vector_container(
539549
resolved_product_image,
540550
VOLUME_MOUNT_NAME_CONFIG,
541551
VOLUME_MOUNT_NAME_LOG,
542-
config
552+
merged_config
543553
.logging
544554
.containers
545555
.get(&SparkHistoryServerContainer::Vector),

0 commit comments

Comments
 (0)