Skip to content

Commit 1f486aa

Browse files
hc-github-team-consul-coresrahul3
and
srahul3
authored
Backport of [Feature] Invalidate session check if associated session is deleted into release/1.21.x (#22233)
* backport of commit 29ea0e5 * backport of commit 8566684 * backport of commit d753c06 --------- Co-authored-by: srahul3 <[email protected]>
1 parent a9b583b commit 1f486aa

File tree

10 files changed

+350
-116
lines changed

10 files changed

+350
-116
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
## 1.21.0 (March 17, 2025)
2+
* Enhancement: Added support for Consul Session to update the state of a Health Check, allowing for more dynamic and responsive health monitoring within the Consul ecosystem. This feature enables sessions to directly influence health check statuses, improving the overall reliability and accuracy of service health assessments.
3+
14
## 1.20.3 (February 13, 2025)
25

36
SECURITY:

agent/consul/state/session.go

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package state
55

66
import (
77
"fmt"
8+
"github.com/hashicorp/consul/api"
89
"reflect"
910
"strings"
1011
"time"
@@ -219,7 +220,7 @@ func (s *Store) SessionCreate(idx uint64, sess *structs.Session) error {
219220
// future.
220221

221222
// Call the session creation
222-
if err := sessionCreateTxn(tx, idx, sess); err != nil {
223+
if err := s.sessionCreateTxn(tx, idx, sess); err != nil {
223224
return err
224225
}
225226

@@ -229,7 +230,7 @@ func (s *Store) SessionCreate(idx uint64, sess *structs.Session) error {
229230
// sessionCreateTxn is the inner method used for creating session entries in
230231
// an open transaction. Any health checks registered with the session will be
231232
// checked for failing status. Returns any error encountered.
232-
func sessionCreateTxn(tx WriteTxn, idx uint64, sess *structs.Session) error {
233+
func (s *Store) sessionCreateTxn(tx WriteTxn, idx uint64, sess *structs.Session) error {
233234
// Check that we have a session ID
234235
if sess.ID == "" {
235236
return ErrMissingSessionID
@@ -270,7 +271,7 @@ func sessionCreateTxn(tx WriteTxn, idx uint64, sess *structs.Session) error {
270271
return fmt.Errorf("failed inserting session: %s", err)
271272
}
272273

273-
return nil
274+
return s.updateSessionCheck(tx, idx, sess, api.HealthPassing)
274275
}
275276

276277
// SessionGet is used to retrieve an active session from the state store.
@@ -448,5 +449,33 @@ func (s *Store) deleteSessionTxn(tx WriteTxn, idx uint64, sessionID string, entM
448449
}
449450
}
450451

452+
// session invalidating the health-checks
453+
return s.updateSessionCheck(tx, idx, session, api.HealthCritical)
454+
}
455+
456+
// updateSessionCheck The method updates the health-checks associated with the session
457+
func (s *Store) updateSessionCheck(tx WriteTxn, idx uint64, session *structs.Session, checkState string) error {
458+
// Find all checks for the given Node
459+
iter, err := tx.Get(tableChecks, indexNode, Query{Value: session.Node, EnterpriseMeta: session.EnterpriseMeta})
460+
if err != nil {
461+
return fmt.Errorf("failed check lookup: %s", err)
462+
}
463+
464+
for check := iter.Next(); check != nil; check = iter.Next() {
465+
if hc := check.(*structs.HealthCheck); hc.Type == "session" && hc.Definition.SessionName == session.Name {
466+
updatedCheck := hc.Clone()
467+
updatedCheck.Status = checkState
468+
switch {
469+
case checkState == api.HealthPassing:
470+
updatedCheck.Output = fmt.Sprintf("Session '%s' in force", session.ID)
471+
default:
472+
updatedCheck.Output = fmt.Sprintf("Session '%s' is invalid", session.ID)
473+
}
474+
475+
if err := s.ensureCheckTxn(tx, idx, true, updatedCheck); err != nil {
476+
return err
477+
}
478+
}
479+
}
451480
return nil
452481
}

agent/consul/state/session_ce.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,10 @@ func validateSessionChecksTxn(tx ReadTxn, session *structs.Session) error {
151151
}
152152

153153
// Verify that the check is not in critical state
154-
status := check.(*structs.HealthCheck).Status
155-
if status == api.HealthCritical {
156-
return fmt.Errorf("Check '%s' is in %s state", checkID, status)
154+
healthCheck := check.(*structs.HealthCheck)
155+
// we are discounting the health check for session checks since they are expected to be in critical state without session and this flow is expected to be used for session checks
156+
if healthCheck.Status == api.HealthCritical && healthCheck.Type != "session" {
157+
return fmt.Errorf("Check '%s' is in %s state", checkID, healthCheck.Status)
157158
}
158159
}
159160
return nil

agent/consul/state/session_test.go

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,3 +961,147 @@ func TestStateStore_Session_Invalidate_PreparedQuery_Delete(t *testing.T) {
961961
t.Fatalf("bad: %v", q2)
962962
}
963963
}
964+
965+
// the goal of this test is to verify if the system is blocking the session registration when a check is in critical state.
966+
func TestHealthCheck_SessionRegistrationFail(t *testing.T) {
967+
s := testStateStore(t)
968+
969+
var check *structs.HealthCheck
970+
// setup node
971+
testRegisterNode(t, s, 1, "foo-node")
972+
testRegisterCheckCustom(t, s, 1, "foo", func(chk *structs.HealthCheck) {
973+
chk.Node = "foo-node"
974+
chk.Type = "tll"
975+
chk.Status = api.HealthCritical
976+
chk.Definition = structs.HealthCheckDefinition{
977+
SessionName: "test-session",
978+
}
979+
check = chk
980+
})
981+
982+
// Ensure the index was not updated if nothing was destroyed.
983+
if idx := s.maxIndex("sessions"); idx != 0 {
984+
t.Fatalf("bad index: %d", idx)
985+
}
986+
987+
// Register a new session
988+
sess := &structs.Session{
989+
ID: testUUID(),
990+
Node: "foo-node",
991+
Name: "test-session",
992+
Checks: make([]types.CheckID, 0),
993+
}
994+
995+
sess.Checks = append(sess.Checks, check.CheckID)
996+
// assert the check is critical initially
997+
assertHealthCheckStatus(t, s, sess, check.CheckID, api.HealthCritical)
998+
999+
if err := s.SessionCreate(2, sess); err == nil {
1000+
// expecting error: Check 'foo' is in critical state
1001+
t.Fatalf("expected error, got nil")
1002+
}
1003+
}
1004+
1005+
// Allow the session to be created even if the check is critical.
1006+
// This is mainly to discount the health check of type `session`
1007+
func TestHealthCheck_SessionRegistrationAllow(t *testing.T) {
1008+
s := testStateStore(t)
1009+
1010+
var check *structs.HealthCheck
1011+
// setup node
1012+
testRegisterNode(t, s, 1, "foo-node")
1013+
testRegisterCheckCustom(t, s, 1, "foo", func(chk *structs.HealthCheck) {
1014+
chk.Node = "foo-node"
1015+
chk.Type = "session"
1016+
chk.Status = api.HealthCritical
1017+
chk.Definition = structs.HealthCheckDefinition{
1018+
SessionName: "test-session",
1019+
}
1020+
check = chk
1021+
})
1022+
1023+
// Ensure the index was not updated if nothing was destroyed.
1024+
if idx := s.maxIndex("sessions"); idx != 0 {
1025+
t.Fatalf("bad index: %d", idx)
1026+
}
1027+
1028+
// Register a new session
1029+
sess := &structs.Session{
1030+
ID: testUUID(),
1031+
Node: "foo-node",
1032+
Name: "test-session",
1033+
Checks: make([]types.CheckID, 0),
1034+
}
1035+
1036+
sess.Checks = append(sess.Checks, check.CheckID)
1037+
// assert the check is critical initially
1038+
assertHealthCheckStatus(t, s, sess, check.CheckID, api.HealthCritical)
1039+
1040+
if err := s.SessionCreate(2, sess); err != nil {
1041+
t.Fatalf("The system shall allow session to be created ignoring the session check is critical. err: %s", err)
1042+
}
1043+
}
1044+
1045+
// test the session health check when session status is changed
1046+
func TestHealthCheck_Session(t *testing.T) {
1047+
s := testStateStore(t)
1048+
1049+
var check *structs.HealthCheck
1050+
// setup node
1051+
testRegisterNode(t, s, 1, "foo-node")
1052+
testRegisterCheckCustom(t, s, 1, "foo", func(chk *structs.HealthCheck) {
1053+
chk.Node = "foo-node"
1054+
chk.Type = "session"
1055+
chk.Status = api.HealthCritical
1056+
chk.Definition = structs.HealthCheckDefinition{
1057+
SessionName: "test-session",
1058+
}
1059+
check = chk
1060+
})
1061+
1062+
// Ensure the index was not updated if nothing was destroyed.
1063+
if idx := s.maxIndex("sessions"); idx != 0 {
1064+
t.Fatalf("bad index: %d", idx)
1065+
}
1066+
1067+
// Register a new session
1068+
sess := &structs.Session{
1069+
ID: testUUID(),
1070+
Node: "foo-node",
1071+
Name: "test-session",
1072+
}
1073+
// assert the check is critical initially
1074+
assertHealthCheckStatus(t, s, sess, check.CheckID, api.HealthCritical)
1075+
1076+
if err := s.SessionCreate(2, sess); err != nil {
1077+
t.Fatalf("The system shall allow session to be created ignoring the session check is critical. err: %s", err)
1078+
}
1079+
// assert the check is critical after session creation
1080+
assertHealthCheckStatus(t, s, sess, check.CheckID, api.HealthPassing)
1081+
1082+
// Destroy the session.
1083+
if err := s.SessionDestroy(3, sess.ID, nil); err != nil {
1084+
t.Fatalf("err: %s", err)
1085+
}
1086+
// assert the check is critical after session destroy
1087+
assertHealthCheckStatus(t, s, sess, check.CheckID, api.HealthCritical)
1088+
}
1089+
1090+
func assertHealthCheckStatus(t *testing.T, s *Store, session *structs.Session, checkID types.CheckID, expectedStatus string) {
1091+
_, hc, err := s.NodeChecks(nil, session.Node, structs.DefaultEnterpriseMetaInPartition(""), structs.DefaultPeerKeyword)
1092+
if err != nil {
1093+
t.Fatalf("err: %s", err)
1094+
}
1095+
// assert the check is healthy
1096+
for _, c := range hc {
1097+
if c.CheckID == checkID {
1098+
if c.Status != expectedStatus {
1099+
t.Fatalf("check is expected to be %s but actually it is %s", expectedStatus, c.Status)
1100+
} else {
1101+
return
1102+
}
1103+
}
1104+
}
1105+
1106+
t.Fatalf("check %s, is not found", string(checkID))
1107+
}

agent/consul/state/state_store_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,29 @@ func testRegisterCheckWithPartition(t *testing.T, s *Store, idx uint64,
308308
}
309309
}
310310

311+
func testRegisterCheckCustom(t *testing.T, s *Store, idx uint64, checkID types.CheckID, update func(chk *structs.HealthCheck)) {
312+
chk := &structs.HealthCheck{
313+
CheckID: checkID,
314+
EnterpriseMeta: *structs.DefaultEnterpriseMetaInPartition(""),
315+
}
316+
317+
update(chk)
318+
319+
if err := s.EnsureCheck(idx, chk); err != nil {
320+
t.Fatalf("err: %s", err)
321+
}
322+
323+
tx := s.db.Txn(false)
324+
defer tx.Abort()
325+
c, err := tx.First(tableChecks, indexID, NodeCheckQuery{Node: chk.Node, CheckID: string(checkID), EnterpriseMeta: chk.EnterpriseMeta})
326+
if err != nil {
327+
t.Fatalf("err: %s", err)
328+
}
329+
if result, ok := c.(*structs.HealthCheck); !ok || result.CheckID != checkID {
330+
t.Fatalf("bad check: %#v", result)
331+
}
332+
}
333+
311334
func testRegisterSidecarProxy(t *testing.T, s *Store, idx uint64, nodeID string, targetServiceID string) {
312335
testRegisterSidecarProxyOpts(t, s, idx, nodeID, targetServiceID)
313336
}

agent/structs/structs.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,6 +1912,7 @@ type HealthCheckDefinition struct {
19121912
GRPCUseTLS bool `json:",omitempty"`
19131913
AliasNode string `json:",omitempty"`
19141914
AliasService string `json:",omitempty"`
1915+
SessionName string `json:",omitempty"`
19151916
TTL time.Duration `json:",omitempty"`
19161917
}
19171918

api/health.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ type HealthCheckDefinition struct {
7575
IntervalDuration time.Duration `json:"-"`
7676
TimeoutDuration time.Duration `json:"-"`
7777
DeregisterCriticalServiceAfterDuration time.Duration `json:"-"`
78+
// when parent Type is `session`, and if this session is destroyed, the check will be marked as critical
79+
SessionName string `json:",omitempty"`
7880

7981
// DEPRECATED in Consul 1.4.1. Use the above time.Duration fields instead.
8082
Interval ReadableDuration

proto/private/pbservice/healthcheck.gen.go

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)