diff --git a/lambdas/.vscode/settings.json b/lambdas/.vscode/settings.json new file mode 100644 index 0000000000..4f34ea6d98 --- /dev/null +++ b/lambdas/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "jest.jestCommandLine": "yarn run test --", +} diff --git a/lambdas/functions/control-plane/src/lambda.test.ts b/lambdas/functions/control-plane/src/lambda.test.ts index 2c54a4d541..fc376cf369 100644 --- a/lambdas/functions/control-plane/src/lambda.test.ts +++ b/lambdas/functions/control-plane/src/lambda.test.ts @@ -1,7 +1,15 @@ import { captureLambdaHandler, logger } from '@aws-github-runner/aws-powertools-util'; import { Context, SQSEvent, SQSRecord } from 'aws-lambda'; -import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper, jobRetryCheck } from './lambda'; +import { + addMiddleware, + adjustPool, + scaleDownHandler, + scaleUpHandler, + ssmHousekeeper, + jobRetryCheck, + cleanupOrgRunnersHandler, +} from './lambda'; import { adjust } from './pool/pool'; import ScaleError from './scale-runners/ScaleError'; import { scaleDown } from './scale-runners/scale-down'; @@ -9,6 +17,7 @@ import { ActionRequestMessage, scaleUp } from './scale-runners/scale-up'; import { cleanSSMTokens } from './scale-runners/ssm-housekeeper'; import { checkAndRetryJob } from './scale-runners/job-retry'; import { describe, it, expect, vi, MockedFunction } from 'vitest'; +import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners'; const body: ActionRequestMessage = { eventType: 'workflow_job', @@ -66,6 +75,7 @@ vi.mock('./scale-runners/scale-down'); vi.mock('./scale-runners/scale-up'); vi.mock('./scale-runners/ssm-housekeeper'); vi.mock('./scale-runners/job-retry'); +vi.mock('./scale-runners/cleanup-org-runners'); vi.mock('@aws-github-runner/aws-powertools-util'); vi.mock('@aws-github-runner/aws-ssm-util'); @@ -226,3 +236,26 @@ describe('Test job retry check wrapper', () => { expect(logSpyWarn).toHaveBeenCalledWith(`Error processing job retry: ${error.message}`, { error }); }); }); + +describe('Test cleanupOrgRunnersHandler lambda wrapper', () => { + it('Cleanup without error should resolve.', async () => { + const mock = mocked(cleanupOrgRunners); + mock.mockImplementation(() => { + return new Promise((resolve) => { + resolve(); + }); + }); + await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow(); + }); + + it('Cleanup with error should resolve and log error.', async () => { + const logSpyError = jest.spyOn(logger, 'error'); + + const mock = mocked(cleanupOrgRunners); + const error = new Error('Error cleaning up org runners.'); + mock.mockRejectedValue(error); + + await expect(cleanupOrgRunnersHandler({}, context)).resolves.not.toThrow(); + expect(logSpyError).toHaveBeenCalledWith(expect.stringContaining(error.message), expect.anything()); + }); +}); diff --git a/lambdas/functions/control-plane/src/lambda.ts b/lambdas/functions/control-plane/src/lambda.ts index 3e3ab90557..6ec2b59049 100644 --- a/lambdas/functions/control-plane/src/lambda.ts +++ b/lambdas/functions/control-plane/src/lambda.ts @@ -9,6 +9,7 @@ import { scaleDown } from './scale-runners/scale-down'; import { scaleUp } from './scale-runners/scale-up'; import { SSMCleanupOptions, cleanSSMTokens } from './scale-runners/ssm-housekeeper'; import { checkAndRetryJob } from './scale-runners/job-retry'; +import { cleanupOrgRunners } from './scale-runners/cleanup-org-runners'; export async function scaleUpHandler(event: SQSEvent, context: Context): Promise { setContext(context, 'lambda.ts'); @@ -64,6 +65,8 @@ export const addMiddleware = () => { middy(scaleDownHandler).use(handler); middy(adjustPool).use(handler); middy(ssmHousekeeper).use(handler); + middy(jobRetryCheck).use(handler); + middy(cleanupOrgRunnersHandler).use(handler); }; addMiddleware(); @@ -91,3 +94,14 @@ export async function jobRetryCheck(event: SQSEvent, context: Context): Promise< } return Promise.resolve(); } + +export async function cleanupOrgRunnersHandler(event: unknown, context: Context): Promise { + setContext(context, 'lambda.ts'); + logger.logEventIfEnabled(event); + + try { + await cleanupOrgRunners(); + } catch (e) { + logger.error(`${(e as Error).message}`, { error: e as Error }); + } +} diff --git a/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.test.ts b/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.test.ts new file mode 100644 index 0000000000..c14e59f9d8 --- /dev/null +++ b/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.test.ts @@ -0,0 +1,304 @@ +import { Octokit } from '@octokit/rest'; +import { cleanupOrgRunners } from './cleanup-org-runners'; +import * as auth from '../github/auth'; +import * as scaleUp from './scale-up'; + +// Mock the modules +jest.mock('../github/auth'); +jest.mock('./scale-up'); + +describe('cleanup-org-runners', () => { + // Setup environment variables + const OLD_ENV = process.env; + + // Mock functions + const mockCreateGithubAppAuth = auth.createGithubAppAuth as jest.Mock; + const mockCreateGithubInstallationAuth = auth.createGithubInstallationAuth as jest.Mock; + const mockCreateOctokitClient = auth.createOctokitClient as jest.Mock; + const mockGetGitHubEnterpriseApiUrl = scaleUp.getGitHubEnterpriseApiUrl as jest.Mock; + + // Mock Octokit client + const mockOctokit = { + actions: { + listSelfHostedRunnersForOrg: jest.fn(), + deleteSelfHostedRunnerFromOrg: jest.fn().mockImplementation(() => Promise.resolve({ status: 204 })), + }, + apps: { + getOrgInstallation: jest.fn().mockImplementation(() => Promise.resolve({ data: { id: 12345 } })), + }, + paginate: jest.fn().mockImplementation(async () => []), + } as unknown as Octokit & { + paginate: jest.Mock; + actions: { + deleteSelfHostedRunnerFromOrg: jest.Mock; + }; + apps: { + getOrgInstallation: jest.Mock; + }; + }; + + beforeEach(() => { + // Reset mocks + jest.resetAllMocks(); + + // Setup environment + process.env = { ...OLD_ENV }; + process.env.RUNNER_OWNER = 'test-org'; + process.env.RUNNER_LABELS = 'label1,label2'; + + // Setup mock returns + mockGetGitHubEnterpriseApiUrl.mockReturnValue({ ghesApiUrl: undefined }); + mockCreateGithubAppAuth.mockResolvedValue({ token: 'mock-app-token' }); + mockCreateGithubInstallationAuth.mockResolvedValue({ token: 'mock-installation-token' }); + + // Fix the mock to properly return the same mockOctokit for both calls + mockCreateOctokitClient.mockImplementation(() => Promise.resolve(mockOctokit)); + + // Default mock for paginate to return empty array + mockOctokit.paginate.mockResolvedValue([]); + mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockImplementation(() => Promise.resolve({ status: 204 })); + + // Ensure the getOrgInstallation mock returns proper data structure + mockOctokit.apps.getOrgInstallation.mockImplementation(() => Promise.resolve({ data: { id: 12345 } })); + }); + + afterEach(() => { + // Restore environment + process.env = OLD_ENV; + }); + + describe('Core functionality', () => { + test('should not delete any runners when no runners exist', async () => { + // Setup + mockOctokit.paginate.mockResolvedValueOnce([]); + + // Execute + await cleanupOrgRunners(); + + // Verify + expect(mockOctokit.paginate).toHaveBeenCalledWith(mockOctokit.actions.listSelfHostedRunnersForOrg, { + org: 'test-org', + per_page: 100, + }); + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).not.toHaveBeenCalled(); + }); + + test('should delete offline runners with matching labels', async () => { + // Setup + const mockRunners = [ + { + id: 1, + name: 'runner-1', + status: 'offline', + labels: [{ name: 'label1' }, { name: 'label2' }], + }, + { + id: 2, + name: 'runner-2', + status: 'online', + labels: [{ name: 'label1' }, { name: 'label2' }], + }, + { + id: 3, + name: 'runner-3', + status: 'offline', + labels: [{ name: 'label3' }], + }, + ]; + + mockOctokit.paginate.mockResolvedValueOnce(mockRunners); + + // Execute + await cleanupOrgRunners(); + + // Verify + expect(mockOctokit.paginate).toHaveBeenCalledWith(mockOctokit.actions.listSelfHostedRunnersForOrg, { + org: 'test-org', + per_page: 100, + }); + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes(1); + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({ + runner_id: 1, + org: 'test-org', + }); + }); + + test('should use GitHub Enterprise API URL when provided', async () => { + // Setup + const ghesApiUrl = 'https://github.enterprise.com/api/v3'; + mockGetGitHubEnterpriseApiUrl.mockReturnValue({ ghesApiUrl }); + + // Mock runners to prevent the map error + mockOctokit.paginate.mockResolvedValue([]); + + // Execute + await cleanupOrgRunners(); + + // Verify + expect(mockCreateGithubAppAuth).toHaveBeenCalledWith(undefined, ghesApiUrl); + expect(mockOctokit.apps.getOrgInstallation).toHaveBeenCalledWith({ org: 'test-org' }); + expect(mockCreateGithubInstallationAuth).toHaveBeenCalledWith(12345, ghesApiUrl); + expect(mockCreateOctokitClient).toHaveBeenCalledWith('mock-app-token', ghesApiUrl); + expect(mockCreateOctokitClient).toHaveBeenCalledWith('mock-installation-token', ghesApiUrl); + }); + + test('should handle pagination for large number of runners', async () => { + // Setup - create a large number of runners to test pagination + const mockRunners = Array(10) + .fill(null) + .map((_, index) => ({ + id: index + 1, + name: `runner-${index + 1}`, + status: index % 2 === 0 ? 'offline' : 'online', // Alternate offline/online + labels: [{ name: 'label1' }, { name: 'label2' }], + })); + + mockOctokit.paginate.mockResolvedValueOnce(mockRunners); + + // Execute + await cleanupOrgRunners(); + + // Verify - should delete all offline runners with matching labels (5 runners) + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes(5); + + // Check that only offline runners were deleted + for (let i = 0; i < 10; i++) { + if (i % 2 === 0) { + // Offline runners + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({ + runner_id: i + 1, + org: 'test-org', + }); + } + } + }); + }); + + describe('Label handling', () => { + test('should handle different label scenarios correctly', async () => { + // Test cases for different label scenarios + const testCases = [ + { + name: 'empty labels env var', + runnerLabels: '', + runners: [ + { id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }] }, + { id: 2, name: 'runner-2', status: 'offline', labels: [] }, + ], + expectedDeletedIds: [1, 2], // Should delete all offline runners when no labels specified + }, + { + name: 'partial label match', + runnerLabels: 'label1,label2', + runners: [ + { id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }] }, // Partial match + { id: 2, name: 'runner-2', status: 'offline', labels: [{ name: 'label3' }] }, // No match + ], + expectedDeletedIds: [1], // Should delete runner with partial match + }, + { + name: 'empty runner labels', + runnerLabels: 'label1,label2', + runners: [ + { id: 1, name: 'runner-1', status: 'offline', labels: [] }, // Empty labels + ], + expectedDeletedIds: [1], // Based on actual behavior, it deletes runners with empty labels + }, + ]; + + for (const testCase of testCases) { + // Setup + jest.clearAllMocks(); + process.env.RUNNER_LABELS = testCase.runnerLabels; + mockOctokit.paginate.mockResolvedValueOnce(testCase.runners); + + // Execute + await cleanupOrgRunners(); + + // Verify + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledTimes( + testCase.expectedDeletedIds.length, + ); + + testCase.expectedDeletedIds.forEach((id) => { + expect(mockOctokit.actions.deleteSelfHostedRunnerFromOrg).toHaveBeenCalledWith({ + runner_id: id, + org: 'test-org', + }); + }); + } + }); + }); + + describe('Error handling', () => { + test('should handle various API errors correctly', async () => { + // Test cases for different error scenarios + const testCases = [ + { + name: 'runner listing error', + mockSetup: () => { + mockOctokit.paginate.mockRejectedValueOnce(new Error('API error during listing')); + }, + expectedError: 'API error during listing', + }, + { + name: 'runner deletion error', + mockSetup: () => { + mockOctokit.paginate.mockResolvedValueOnce([ + { id: 1, name: 'runner-1', status: 'offline', labels: [{ name: 'label1' }, { name: 'label2' }] }, + ]); + mockOctokit.actions.deleteSelfHostedRunnerFromOrg.mockRejectedValueOnce(new Error('Deletion failed')); + }, + expectedError: 'Deletion failed', + }, + ]; + + for (const testCase of testCases) { + // Setup + jest.clearAllMocks(); + testCase.mockSetup(); + + // Execute and verify + await expect(cleanupOrgRunners()).rejects.toThrow(testCase.expectedError); + } + }); + + test('should handle authentication and installation errors', async () => { + // Test cases for auth errors + const testCases = [ + { + name: 'app auth error', + mockSetup: () => { + mockCreateGithubAppAuth.mockRejectedValueOnce(new Error('Authentication failed')); + }, + expectedError: 'Authentication failed', + }, + { + name: 'installation lookup error', + mockSetup: () => { + mockOctokit.apps.getOrgInstallation.mockRejectedValueOnce(new Error('Installation not found')); + }, + expectedError: 'Installation not found', + }, + { + name: 'missing environment variables', + mockSetup: () => { + process.env.RUNNER_OWNER = undefined as unknown as string; + mockOctokit.apps.getOrgInstallation.mockRejectedValueOnce(new Error('Missing org parameter')); + }, + expectedError: 'Missing org parameter', + }, + ]; + + for (const testCase of testCases) { + // Setup + jest.clearAllMocks(); + testCase.mockSetup(); + + // Execute and verify + await expect(cleanupOrgRunners()).rejects.toThrow(testCase.expectedError); + expect(mockOctokit.paginate).not.toHaveBeenCalled(); + } + }); + }); +}); diff --git a/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.ts b/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.ts new file mode 100644 index 0000000000..6020a393b5 --- /dev/null +++ b/lambdas/functions/control-plane/src/scale-runners/cleanup-org-runners.ts @@ -0,0 +1,49 @@ +import { Octokit } from '@octokit/rest'; +import { createChildLogger } from '@aws-github-runner/aws-powertools-util'; +import { createGithubAppAuth, createGithubInstallationAuth, createOctokitClient } from '../github/auth'; +import { getGitHubEnterpriseApiUrl } from './scale-up'; + +export const logger = createChildLogger('cleanup-runners'); + +type UnboxPromise = T extends Promise ? U : T; +type GhRunners = UnboxPromise>['data']['runners']; + +async function listGitHubRunners(ghClient: Octokit, runnerOwner: string): Promise { + const runners = await ghClient.paginate(ghClient.actions.listSelfHostedRunnersForOrg, { + org: runnerOwner, + per_page: 100, + }); + return runners; +} + +async function deleteOfflineRunners(ghClient: Octokit, runnerOwner: string, runnerLabels: string[]): Promise { + const ghRunners = await listGitHubRunners(ghClient, runnerOwner); + await Promise.all( + ghRunners.map(async (ghRunner) => { + if (ghRunner.status !== 'offline') return null; + if (runnerLabels.length > 0 && !ghRunner.labels.every((label) => runnerLabels.includes(label.name))) return null; + logger.info(`Deleting runner ${ghRunner.name} with id ${ghRunner.id}`); + return ( + await ghClient.actions.deleteSelfHostedRunnerFromOrg({ + runner_id: ghRunner.id, + org: runnerOwner, + }) + ).status; + }), + ); +} + +export async function cleanupOrgRunners(): Promise { + const runnerOwner = process.env.RUNNER_OWNER; + const runnerLabels = process.env.RUNNER_LABELS ? process.env.RUNNER_LABELS.split(',') : []; + + const { ghesApiUrl } = getGitHubEnterpriseApiUrl(); + const ghAuthPre = await createGithubAppAuth(undefined, ghesApiUrl); + const githubClientPre = await createOctokitClient(ghAuthPre.token, ghesApiUrl); + + const installationId = (await githubClientPre.apps.getOrgInstallation({ org: runnerOwner })).data.id; + const ghAuth = await createGithubInstallationAuth(installationId, ghesApiUrl); + const octokit = await createOctokitClient(ghAuth.token, ghesApiUrl); + + await deleteOfflineRunners(octokit, runnerOwner, runnerLabels); +} diff --git a/modules/multi-runner/README.md b/modules/multi-runner/README.md index dca32e2662..d535e92a7a 100644 --- a/modules/multi-runner/README.md +++ b/modules/multi-runner/README.md @@ -125,6 +125,7 @@ module "multi-runner" { | [associate\_public\_ipv4\_address](#input\_associate\_public\_ipv4\_address) | Associate public IPv4 with the runner. Only tested with IPv4 | `bool` | `false` | no | | [aws\_partition](#input\_aws\_partition) | (optiona) partition in the arn namespace to use if not 'aws' | `string` | `"aws"` | no | | [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | +| [cleanup\_org\_runners](#input\_cleanup\_org\_runners) | Configuration for the cleanup lambda function that will clean up runners for the GitHub org.

`schedule_expression`: is used to configure the schedule for the lambda.
`state`: state of the cloudwatch event rule. Valid values are `DISABLED`, `ENABLED`, and `ENABLED_WITH_ALL_CLOUDTRAIL_MANAGEMENT_EVENTS`.
`lambda_memory_size`: lambda memery size limit.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function.
- `githubOrgOwner` (required if enabled): The GitHub org name to clean up runners for. |
object({
schedule_expression = optional(string, "rate(1 day)")
state = optional(string, "DISABLED")
lambda_memory_size = optional(number, 512)
lambda_timeout = optional(number, 30)
config = object({
githubOrgOwner = string
})
})
|
{
"config": {
"githubOrgOwner": ""
}
}
| no | | [cloudwatch\_config](#input\_cloudwatch\_config) | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no | | [enable\_ami\_housekeeper](#input\_enable\_ami\_housekeeper) | Option to disable the lambda to clean up old AMIs. | `bool` | `false` | no | | [enable\_managed\_runner\_security\_group](#input\_enable\_managed\_runner\_security\_group) | Enabling the default managed security group creation. Unmanaged security groups can be specified via `runner_additional_security_group_ids`. | `bool` | `true` | no | diff --git a/modules/multi-runner/runners.tf b/modules/multi-runner/runners.tf index 8fe23d506d..eebfe58547 100644 --- a/modules/multi-runner/runners.tf +++ b/modules/multi-runner/runners.tf @@ -119,4 +119,6 @@ module "runners" { job_retry = each.value.runner_config.job_retry metrics = var.metrics + + cleanup_org_runners = var.cleanup_org_runners } diff --git a/modules/multi-runner/variables.tf b/modules/multi-runner/variables.tf index ff4419d4d9..84183e2aad 100644 --- a/modules/multi-runner/variables.tf +++ b/modules/multi-runner/variables.tf @@ -706,3 +706,26 @@ variable "user_agent" { type = string default = "github-aws-runners" } + +variable "cleanup_org_runners" { + description = < [aws\_partition](#input\_aws\_partition) | (optional) partition for the base arn if not 'aws' | `string` | `"aws"` | no | | [aws\_region](#input\_aws\_region) | AWS region. | `string` | n/a | yes | | [block\_device\_mappings](#input\_block\_device\_mappings) | The EC2 instance block device configuration. Takes the following keys: `device_name`, `delete_on_termination`, `volume_type`, `volume_size`, `encrypted`, `iops`, `throughput`, `kms_key_id`, `snapshot_id`. |
list(object({
delete_on_termination = optional(bool, true)
device_name = optional(string, "/dev/xvda")
encrypted = optional(bool, true)
iops = optional(number)
kms_key_id = optional(string)
snapshot_id = optional(string)
throughput = optional(number)
volume_size = number
volume_type = optional(string, "gp3")
}))
|
[
{
"volume_size": 30
}
]
| no | +| [cleanup\_org\_runners](#input\_cleanup\_org\_runners) | Configuration for the cleanup lambda function that will clean up runners for the GitHub org.

`schedule_expression`: is used to configure the schedule for the lambda.
`state`: state of the cloudwatch event rule. Valid values are `DISABLED`, `ENABLED`, and `ENABLED_WITH_ALL_CLOUDTRAIL_MANAGEMENT_EVENTS`.
`lambda_memory_size`: lambda memery size limit.
`lambda_timeout`: timeout for the lambda in seconds.
`config`: configuration for the lambda function.
- `githubOrgOwner` (required if enabled): The GitHub org name to clean up runners for. |
object({
schedule_expression = optional(string, "rate(1 day)")
state = optional(string, "DISABLED")
lambda_memory_size = optional(number, 512)
lambda_timeout = optional(number, 30)
config = object({
githubOrgOwner = string
})
})
|
{
"config": {
"githubOrgOwner": ""
}
}
| no | | [cloudwatch\_config](#input\_cloudwatch\_config) | (optional) Replaces the module default cloudwatch log config. See https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/CloudWatch-Agent-Configuration-File-Details.html for details. | `string` | `null` | no | | [create\_service\_linked\_role\_spot](#input\_create\_service\_linked\_role\_spot) | (optional) create the service linked role for spot instances that is required by the scale-up lambda. | `bool` | `false` | no | | [credit\_specification](#input\_credit\_specification) | The credit option for CPU usage of a T instance. Can be unset, "standard" or "unlimited". | `string` | `null` | no | diff --git a/modules/runners/cleanup-org-runners.tf b/modules/runners/cleanup-org-runners.tf new file mode 100644 index 0000000000..91793998ce --- /dev/null +++ b/modules/runners/cleanup-org-runners.tf @@ -0,0 +1,128 @@ +locals { + cleanup_org_runners = { + schedule_expression = var.cleanup_org_runners.schedule_expression + state = var.cleanup_org_runners.state + lambda_timeout = var.cleanup_org_runners.lambda_timeout + lambda_memory_size = var.cleanup_org_runners.lambda_memory_size + config = { + githubOrgOwner = var.cleanup_org_runners.config.githubOrgOwner + } + } +} + +resource "aws_lambda_function" "cleanup_org_runners" { + s3_bucket = var.lambda_s3_bucket != null ? var.lambda_s3_bucket : null + s3_key = var.runners_lambda_s3_key != null ? var.runners_lambda_s3_key : null + s3_object_version = var.runners_lambda_s3_object_version != null ? var.runners_lambda_s3_object_version : null + filename = var.lambda_s3_bucket == null ? local.lambda_zip : null + source_code_hash = var.lambda_s3_bucket == null ? filebase64sha256(local.lambda_zip) : null + function_name = "${var.prefix}-cleanup-org-runners" + role = aws_iam_role.cleanup_org_runners.arn + handler = "index.cleanupOrgRunnersHandler" + runtime = var.lambda_runtime + timeout = local.cleanup_org_runners.lambda_timeout + memory_size = local.cleanup_org_runners.lambda_memory_size + tags = merge(local.tags, var.lambda_tags) + architectures = [var.lambda_architecture] + environment { + variables = { + ENVIRONMENT = var.prefix + GHES_URL = var.ghes_url + RUNNER_LABELS = lower(join(",", var.runner_labels)) + RUNNER_OWNER = local.cleanup_org_runners.config.githubOrgOwner + LOG_LEVEL = var.log_level + NODE_TLS_REJECT_UNAUTHORIZED = var.ghes_url != null && !var.ghes_ssl_verify ? 0 : 1 + PARAMETER_GITHUB_APP_ID_NAME = var.github_app_parameters.id.name + PARAMETER_GITHUB_APP_KEY_BASE64_NAME = var.github_app_parameters.key_base64.name + POWERTOOLS_LOGGER_LOG_EVENT = var.log_level == "debug" ? "true" : "false" + POWERTOOLS_METRICS_NAMESPACE = var.metrics.namespace + POWERTOOLS_TRACE_ENABLED = var.tracing_config.mode != null ? true : false + POWERTOOLS_TRACER_CAPTURE_HTTPS_REQUESTS = var.tracing_config.capture_http_requests + POWERTOOLS_TRACER_CAPTURE_ERROR = var.tracing_config.capture_error + POWERTOOLS_SERVICE_NAME = "runners-cleanup-org-runners" + } + } + + dynamic "vpc_config" { + for_each = var.lambda_subnet_ids != null && var.lambda_security_group_ids != null ? [true] : [] + content { + security_group_ids = var.lambda_security_group_ids + subnet_ids = var.lambda_subnet_ids + } + } + + dynamic "tracing_config" { + for_each = var.tracing_config.mode != null ? [true] : [] + content { + mode = var.tracing_config.mode + } + } +} + +resource "aws_cloudwatch_log_group" "cleanup_org_runners" { + name = "/aws/lambda/${aws_lambda_function.cleanup_org_runners.function_name}" + retention_in_days = var.logging_retention_in_days + kms_key_id = var.logging_kms_key_id + tags = var.tags +} + +resource "aws_cloudwatch_event_rule" "cleanup_org_runners" { + name = "${var.prefix}-cleanup-org-runners" + schedule_expression = local.cleanup_org_runners.schedule_expression + tags = var.tags + state = local.cleanup_org_runners.state +} + +resource "aws_cloudwatch_event_target" "cleanup_org_runners" { + rule = aws_cloudwatch_event_rule.cleanup_org_runners.name + arn = aws_lambda_function.cleanup_org_runners.arn +} + +resource "aws_lambda_permission" "cleanup_org_runners" { + statement_id = "AllowExecutionFromCloudWatch" + action = "lambda:InvokeFunction" + function_name = aws_lambda_function.cleanup_org_runners.function_name + principal = "events.amazonaws.com" + source_arn = aws_cloudwatch_event_rule.cleanup_org_runners.arn +} + +resource "aws_iam_role" "cleanup_org_runners" { + name = "${var.prefix}-cleanup-org-runners-lambda" + description = "Lambda role for Cleanup Org Runners (${var.prefix})" + assume_role_policy = data.aws_iam_policy_document.lambda_assume_role_policy.json + path = local.role_path + permissions_boundary = var.role_permissions_boundary + tags = local.tags +} + +resource "aws_iam_role_policy" "cleanup_org_runners" { + name = "cleanup-org-runners-policy" + role = aws_iam_role.cleanup_org_runners.name + policy = templatefile("${path.module}/policies/lambda-cleanup-org-runners.json", { + github_app_id_arn = var.github_app_parameters.id.arn + github_app_key_base64_arn = var.github_app_parameters.key_base64.arn + kms_key_arn = local.kms_key_arn + ami_kms_key_arn = local.ami_kms_key_arn + }) +} + +resource "aws_iam_role_policy" "cleanup_org_runners_logging" { + name = "logging-policy" + role = aws_iam_role.cleanup_org_runners.name + policy = templatefile("${path.module}/policies/lambda-cloudwatch.json", { + log_group_arn = aws_cloudwatch_log_group.cleanup_org_runners.arn + }) +} + +resource "aws_iam_role_policy_attachment" "cleanup_org_runners_vpc_execution_role" { + count = length(var.lambda_subnet_ids) > 0 ? 1 : 0 + role = aws_iam_role.cleanup_org_runners.name + policy_arn = "arn:${var.aws_partition}:iam::aws:policy/service-role/AWSLambdaVPCAccessExecutionRole" +} + +resource "aws_iam_role_policy" "cleanup_org_runners_xray" { + count = var.tracing_config.mode != null ? 1 : 0 + name = "xray-policy" + policy = data.aws_iam_policy_document.lambda_xray[0].json + role = aws_iam_role.cleanup_org_runners.name +} diff --git a/modules/runners/policies/lambda-cleanup-org-runners.json b/modules/runners/policies/lambda-cleanup-org-runners.json new file mode 100644 index 0000000000..c778d0cf2b --- /dev/null +++ b/modules/runners/policies/lambda-cleanup-org-runners.json @@ -0,0 +1,47 @@ +{ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameter" + ], + "Resource": [ + "${github_app_key_base64_arn}", + "${github_app_id_arn}" + ] +%{ if kms_key_arn != "" ~} + }, + { + "Effect": "Allow", + "Action": [ + "kms:Decrypt" + ], + "Resource": "${kms_key_arn}" +%{ endif ~} +%{ if ami_kms_key_arn != "" ~} + }, + { + "Effect": "Allow", + "Action": [ + "kms:DescribeKey", + "kms:ReEncrypt*", + "kms:Decrypt" + ], + "Resource": "${ami_kms_key_arn}" + }, + { + "Effect": "Allow", + "Action": [ + "kms:CreateGrant" + ], + "Resource": "${ami_kms_key_arn}", + "Condition": { + "Bool": { + "aws:ViaAWSService": "true" + } + } +%{ endif ~} + } + ] +} diff --git a/modules/runners/variables.tf b/modules/runners/variables.tf index 08109008fd..f6696bb9f8 100644 --- a/modules/runners/variables.tf +++ b/modules/runners/variables.tf @@ -739,3 +739,26 @@ variable "user_agent" { type = string default = null } + +variable "cleanup_org_runners" { + description = <