Skip to content

Commit 0da9429

Browse files
committed
feat: Add spot termination handler
1 parent 2667fbf commit 0da9429

File tree

14 files changed

+583
-0
lines changed

14 files changed

+583
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import { EC2Client, DescribeInstancesCommand, DescribeInstancesResult } from '@aws-sdk/client-ec2';
2+
import { mockClient } from 'aws-sdk-client-mock';
3+
import { getInstances, tagFilter } from './ec2';
4+
5+
const ec2Mock = mockClient(EC2Client);
6+
7+
describe('getInstances', () => {
8+
beforeEach(() => {
9+
ec2Mock.reset();
10+
});
11+
12+
it('should return the instance when found', async () => {
13+
const instanceId = 'i-1234567890abcdef0';
14+
const instance = { InstanceId: instanceId };
15+
ec2Mock.on(DescribeInstancesCommand).resolves({
16+
Reservations: [{ Instances: [instance] }],
17+
});
18+
19+
const result = await getInstances(new EC2Client({}), [instanceId]);
20+
expect(result).toEqual([instance]);
21+
});
22+
23+
describe('should return null when the instance is not found', () => {
24+
it.each([{ Reservations: [] }, {}, { Reservations: undefined }])(
25+
'with %p',
26+
async (item: DescribeInstancesResult) => {
27+
const instanceId = 'i-1234567890abcdef0';
28+
ec2Mock.on(DescribeInstancesCommand).resolves(item);
29+
30+
const result = await getInstances(new EC2Client({}), [instanceId]);
31+
expect(result).toEqual([]);
32+
},
33+
);
34+
});
35+
});
36+
37+
describe('tagFilter', () => {
38+
describe('should return true when the instance matches the tag filters', () => {
39+
it.each([{ Environment: 'production' }, { Environment: 'prod' }])(
40+
'with %p',
41+
(tagFilters: Record<string, string>) => {
42+
const instance = {
43+
Tags: [
44+
{ Key: 'Name', Value: 'test-instance' },
45+
{ Key: 'Environment', Value: 'production' },
46+
],
47+
};
48+
49+
const result = tagFilter(instance, tagFilters);
50+
expect(result).toBe(true);
51+
},
52+
);
53+
});
54+
55+
it('should return false when the instance does not have all the tags', () => {
56+
const instance = {
57+
Tags: [{ Key: 'Name', Value: 'test-instance' }],
58+
};
59+
const tagFilters = { Name: 'test', Environment: 'prod' };
60+
61+
const result = tagFilter(instance, tagFilters);
62+
expect(result).toBe(false);
63+
});
64+
65+
it('should return false when the instance does not have any tags', () => {
66+
const instance = {};
67+
const tagFilters = { Name: 'test', Environment: 'prod' };
68+
69+
const result = tagFilter(instance, tagFilters);
70+
expect(result).toBe(false);
71+
});
72+
73+
it('should return true if the tag filters are empty', () => {
74+
const instance = {
75+
Tags: [
76+
{ Key: 'Name', Value: 'test-instance' },
77+
{ Key: 'Environment', Value: 'production' },
78+
],
79+
};
80+
const tagFilters = {};
81+
82+
const result = tagFilter(instance, tagFilters);
83+
expect(result).toBe(true);
84+
});
85+
86+
it('should return false if instance is null', () => {
87+
const instance = null;
88+
const tagFilters = { Name: 'test', Environment: 'prod' };
89+
90+
const result = tagFilter(instance, tagFilters);
91+
expect(result).toBe(false);
92+
});
93+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import { DescribeInstancesCommand, EC2Client, Instance } from '@aws-sdk/client-ec2';
2+
3+
export async function getInstances(ec2: EC2Client, instanceId: string[]): Promise<Instance[]> {
4+
const result = await ec2.send(new DescribeInstancesCommand({ InstanceIds: instanceId }));
5+
const instances = result.Reservations?.[0]?.Instances;
6+
return instances ?? [];
7+
}
8+
9+
export function tagFilter(instance: Instance | null, tagFilters: Record<string, string>): boolean {
10+
return Object.keys(tagFilters).every((key) => {
11+
return instance?.Tags?.find((tag) => tag.Key === key && tag.Value?.startsWith(tagFilters[key]));
12+
});
13+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
import { Instance } from '@aws-sdk/client-ec2';
2+
import 'aws-sdk-client-mock-jest';
3+
import { SpotInterruptionWarning, SpotTerminationDetail } from './types';
4+
import { createSingleMetric } from '@aws-github-runner/aws-powertools-util';
5+
import { MetricUnit } from '@aws-lambda-powertools/metrics';
6+
import { metricEvent } from './metric-event';
7+
8+
jest.mock('@aws-github-runner/aws-powertools-util', () => ({
9+
...jest.requireActual('@aws-github-runner/aws-powertools-util'),
10+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
11+
createSingleMetric: jest.fn((name: string, unit: string, value: number, dimensions?: Record<string, string>) => {
12+
return {
13+
addMetadata: jest.fn(),
14+
};
15+
}),
16+
}));
17+
18+
const event: SpotInterruptionWarning<SpotTerminationDetail> = {
19+
version: '0',
20+
id: '1',
21+
'detail-type': 'EC2 Spot Instance Interruption Warning',
22+
source: 'aws.ec2',
23+
account: '123456789012',
24+
time: '2015-11-11T21:29:54Z',
25+
region: 'us-east-1',
26+
resources: ['arn:aws:ec2:us-east-1b:instance/i-abcd1111'],
27+
detail: {
28+
'instance-id': 'i-abcd1111',
29+
'instance-action': 'terminate',
30+
},
31+
};
32+
33+
const instance: Instance = {
34+
InstanceId: event.detail['instance-id'],
35+
InstanceType: 't2.micro',
36+
Tags: [
37+
{ Key: 'Name', Value: 'test-instance' },
38+
{ Key: 'ghr:environment', Value: 'test' },
39+
{ Key: 'ghr:created_by', Value: 'niek' },
40+
],
41+
State: { Name: 'running' },
42+
LaunchTime: new Date('2021-01-01'),
43+
};
44+
45+
describe('create metric and metric logs', () => {
46+
beforeEach(() => {
47+
jest.clearAllMocks();
48+
});
49+
50+
it('should log and create a metric', async () => {
51+
const metricName = 'SpotInterruptionWarning';
52+
await metricEvent(instance, event, metricName, console);
53+
expect(createSingleMetric).toHaveBeenCalledTimes(1);
54+
expect(createSingleMetric).toHaveBeenCalledWith(metricName, MetricUnit.Count, 1, {
55+
InstanceType: instance.InstanceType ? instance.InstanceType : 'unknown',
56+
Environment: instance.Tags?.find((tag) => tag.Key === 'ghr:environment')?.Value ?? 'unknown',
57+
});
58+
});
59+
60+
it('should log and create a metric for instance with limited data', async () => {
61+
const metricName = 'SpotInterruptionWarning';
62+
const instanceMinimalData: Instance = {
63+
...instance,
64+
InstanceId: undefined,
65+
InstanceType: undefined,
66+
LaunchTime: undefined,
67+
Tags: undefined,
68+
};
69+
70+
await metricEvent(instanceMinimalData, event, metricName, console);
71+
expect(createSingleMetric).toHaveBeenCalledTimes(1);
72+
expect(createSingleMetric).toHaveBeenCalledWith(metricName, MetricUnit.Count, 1, {
73+
InstanceType: instanceMinimalData.InstanceType ? instanceMinimalData.InstanceType : 'unknown',
74+
Environment: instanceMinimalData.Tags?.find((tag) => tag.Key === 'ghr:environment')?.Value ?? 'unknown',
75+
});
76+
});
77+
78+
it('should log and create NOT create a metric', async () => {
79+
await expect(metricEvent(instance, event, undefined, console)).resolves.not.toThrow();
80+
expect(createSingleMetric).not.toHaveBeenCalled();
81+
});
82+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { createSingleMetric } from '@aws-github-runner/aws-powertools-util';
2+
import { Instance } from '@aws-sdk/client-ec2';
3+
import { MetricUnit } from '@aws-lambda-powertools/metrics';
4+
import { Logger } from '@aws-sdk/types';
5+
import { EventBridgeEvent } from 'aws-lambda';
6+
7+
export async function metricEvent(
8+
instance: Instance,
9+
event: EventBridgeEvent<string, unknown>,
10+
metricName: string | undefined,
11+
logger: Logger,
12+
): Promise<void> {
13+
const instanceRunningTimeInSeconds = instance.LaunchTime
14+
? (new Date(event.time).getTime() - new Date(instance.LaunchTime).getTime()) / 1000
15+
: undefined;
16+
logger.info(`Received spot notification for ${metricName}`, {
17+
instanceId: instance.InstanceId,
18+
instanceType: instance.InstanceType ?? 'unknown',
19+
instanceName: instance.Tags?.find((tag) => tag.Key === 'Name')?.Value,
20+
instanceState: instance.State?.Name,
21+
instanceLaunchTime: instance.LaunchTime,
22+
instanceRunningTimeInSeconds,
23+
tags: instance.Tags,
24+
});
25+
if (metricName) {
26+
const metric = createSingleMetric(metricName, MetricUnit.Count, 1, {
27+
InstanceType: instance.InstanceType ? instance.InstanceType : 'unknown',
28+
Environment: instance.Tags?.find((tag) => tag.Key === 'ghr:environment')?.Value ?? 'unknown',
29+
});
30+
metric.addMetadata('InstanceId', instance.InstanceId ?? 'unknown');
31+
metric.addMetadata('InstanceType', instance.InstanceType ? instance.InstanceType : 'unknown');
32+
metric.addMetadata('Environment', instance.Tags?.find((tag) => tag.Key === 'ghr:environment')?.Value ?? 'unknown');
33+
}
34+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import { EC2Client, Instance } from '@aws-sdk/client-ec2';
2+
import { mockClient } from 'aws-sdk-client-mock';
3+
import 'aws-sdk-client-mock-jest';
4+
import { handle } from './termination';
5+
import { BidEvictedDetail, BidEvictedEvent } from './types';
6+
import { metricEvent } from './metric-event';
7+
import { mocked } from 'jest-mock';
8+
import { getInstances } from './ec2';
9+
10+
jest.mock('./metric-event', () => ({
11+
metricEvent: jest.fn(),
12+
}));
13+
14+
jest.mock('./ec2', () => ({
15+
...jest.requireActual('./ec2'),
16+
getInstances: jest.fn(),
17+
}));
18+
19+
mockClient(EC2Client);
20+
21+
const config = {
22+
createSpotWarningMetric: false,
23+
createSpotTerminationMetric: true,
24+
tagFilters: { 'ghr:environment': 'test' },
25+
prefix: 'runners',
26+
};
27+
28+
const event: BidEvictedEvent<BidEvictedDetail> = {
29+
version: '0',
30+
id: '186d7999-3121-e749-23f3-c7caec1084e1',
31+
'detail-type': 'AWS Service Event via CloudTrail',
32+
source: 'aws.ec2',
33+
account: '123456789012',
34+
time: '2024-10-09T11:48:46Z',
35+
region: 'eu-west-1',
36+
resources: [],
37+
detail: {
38+
eventVersion: '1.10',
39+
userIdentity: {
40+
accountId: '123456789012',
41+
invokedBy: 'sec2.amazonaws.com',
42+
},
43+
eventTime: '2024-10-09T11:48:46Z',
44+
eventSource: 'ec2.amazonaws.com',
45+
eventName: 'BidEvictedEvent',
46+
awsRegion: 'eu-west-1',
47+
sourceIPAddress: 'ec2.amazonaws.com',
48+
userAgent: 'ec2.amazonaws.com',
49+
requestParameters: null,
50+
responseElements: null,
51+
requestID: 'ebf032e3-5009-3484-aae8-b4946ab2e2eb',
52+
eventID: '3a15843b-96c2-41b1-aac1-7d62dc754547',
53+
readOnly: false,
54+
eventType: 'AwsServiceEvent',
55+
managementEvent: true,
56+
recipientAccountId: '123456789012',
57+
serviceEventDetails: {
58+
instanceIdSet: ['i-12345678901234567'],
59+
},
60+
eventCategory: 'Management',
61+
},
62+
};
63+
64+
const instance: Instance = {
65+
InstanceId: event.detail.serviceEventDetails.instanceIdSet[0],
66+
InstanceType: 't2.micro',
67+
Tags: [
68+
{ Key: 'Name', Value: 'test-instance' },
69+
{ Key: 'ghr:environment', Value: 'test' },
70+
{ Key: 'ghr:created_by', Value: 'niek' },
71+
],
72+
State: { Name: 'running' },
73+
LaunchTime: new Date('2021-01-01'),
74+
};
75+
76+
describe('handle termination warning', () => {
77+
beforeEach(() => {
78+
jest.clearAllMocks();
79+
});
80+
81+
it('should log and create an metric', async () => {
82+
mocked(getInstances).mockResolvedValue([instance]);
83+
await handle(event, config);
84+
85+
expect(metricEvent).toHaveBeenCalled();
86+
expect(metricEvent).toHaveBeenCalledWith(instance, event, 'SpotTermination', expect.anything());
87+
});
88+
89+
it('should log details and not create a metric', async () => {
90+
mocked(getInstances).mockResolvedValue([instance]);
91+
92+
await handle(event, { ...config, createSpotTerminationMetric: false });
93+
expect(metricEvent).toHaveBeenCalledWith(instance, event, undefined, expect.anything());
94+
});
95+
96+
it('should not create a metric if filter not matched.', async () => {
97+
mocked(getInstances).mockResolvedValue([instance]);
98+
99+
await handle(event, {
100+
createSpotWarningMetric: false,
101+
createSpotTerminationMetric: true,
102+
tagFilters: { 'ghr:environment': '_NO_MATCH_' },
103+
prefix: 'runners',
104+
});
105+
106+
expect(metricEvent).not.toHaveBeenCalled();
107+
});
108+
});
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import { createChildLogger, getTracedAWSV3Client } from '@aws-github-runner/aws-powertools-util';
2+
import { BidEvictedDetail, BidEvictedEvent } from './types';
3+
import { EC2Client } from '@aws-sdk/client-ec2';
4+
import { Config } from './ConfigResolver';
5+
import { metricEvent } from './metric-event';
6+
import { getInstances } from './ec2';
7+
8+
const logger = createChildLogger('termination-handler');
9+
10+
export async function handle(event: BidEvictedEvent<BidEvictedDetail>, config: Config): Promise<void> {
11+
logger.debug('Received spot termination (BidEvictedEvent):', { event });
12+
13+
const instanceIds = event.detail.serviceEventDetails?.instanceIdSet;
14+
await createMetricForInstances(instanceIds, event, config);
15+
}
16+
17+
async function createMetricForInstances(
18+
instanceIds: string[],
19+
event: BidEvictedEvent<BidEvictedDetail>,
20+
config: Config,
21+
): Promise<void> {
22+
const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
23+
24+
const instances = await getInstances(ec2, instanceIds);
25+
logger.debug('Received spot notification termination for:', { instances });
26+
27+
// check if all tags in config.tagFilter are present on the instance
28+
for (const instance of instances) {
29+
const matchFilter = Object.keys(config.tagFilters).every((key) => {
30+
return instance?.Tags?.find((tag) => tag.Key === key && tag.Value?.startsWith(config.tagFilters[key]));
31+
});
32+
33+
if (matchFilter && instance) {
34+
metricEvent(instance, event, config.createSpotTerminationMetric ? 'SpotTermination' : undefined, logger);
35+
} else {
36+
logger.debug(
37+
`Received spot termination but ` +
38+
`details are not available or instance not matching the tag filter (${config.tagFilters}).`,
39+
);
40+
}
41+
}
42+
}

0 commit comments

Comments
 (0)