philips-labs
diff --git a/‎README.md
Lines changed: 14 additions & 1 deletion b/‎README.md
Lines changed: 14 additions & 1 deletion
diff --git a/‎examples/ephemeral/main.tf
Lines changed: 12 additions & 3 deletions b/‎examples/ephemeral/main.tf
Lines changed: 12 additions & 3 deletions
diff --git a/‎examples/multi-runner/main.tf
Lines changed: 6 additions & 0 deletions b/‎examples/multi-runner/main.tf
Lines changed: 6 additions & 0 deletions
diff --git a/‎lambdas/functions/ami-housekeeper/src/ami.ts
Lines changed: 5 additions & 4 deletions b/‎lambdas/functions/ami-housekeeper/src/ami.ts
Lines changed: 5 additions & 4 deletions
diff --git a/‎lambdas/functions/control-plane/package.json
Lines changed: 2 additions & 0 deletions b/‎lambdas/functions/control-plane/package.json
Lines changed: 2 additions & 0 deletions
diff --git a/‎lambdas/functions/control-plane/src/aws/runners.d.ts
Lines changed: 1 addition & 0 deletions b/‎lambdas/functions/control-plane/src/aws/runners.d.ts
Lines changed: 1 addition & 0 deletions
diff --git a/‎lambdas/functions/control-plane/src/aws/runners.test.ts
Lines changed: 17 additions & 0 deletions b/‎lambdas/functions/control-plane/src/aws/runners.test.ts
Lines changed: 17 additions & 0 deletions
diff --git a/‎lambdas/functions/control-plane/src/aws/runners.ts
Lines changed: 10 additions & 3 deletions b/‎lambdas/functions/control-plane/src/aws/runners.ts
Lines changed: 10 additions & 3 deletions
diff --git a/‎lambdas/functions/control-plane/src/axios/fetch-override.test.ts
Lines changed: 31 additions & 0 deletions b/‎lambdas/functions/control-plane/src/axios/fetch-override.test.ts
Lines changed: 31 additions & 0 deletions
diff --git a/‎lambdas/functions/control-plane/src/axios/fetch-override.ts
Lines changed: 19 additions & 0 deletions b/‎lambdas/functions/control-plane/src/axios/fetch-override.ts
Lines changed: 19 additions & 0 deletions
diff --git a/‎lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts
Lines changed: 2 additions & 2 deletions b/‎lambdas/functions/control-plane/src/gh-auth/gh-auth.test.ts
Lines changed: 2 additions & 2 deletions
diff --git a/‎lambdas/functions/control-plane/src/gh-auth/gh-auth.ts
Lines changed: 8 additions & 1 deletion b/‎lambdas/functions/control-plane/src/gh-auth/gh-auth.ts
Lines changed: 8 additions & 1 deletion
diff --git a/‎lambdas/functions/control-plane/src/lambda.test.ts
Lines changed: 10 additions & 2 deletions b/‎lambdas/functions/control-plane/src/lambda.test.ts
Lines changed: 10 additions & 2 deletions
@@ -32,6 +32,7 @@ This [Terraform](https://www.terraform.io/) module creates the required infrastr
 - [Examples](#examples)
 - [Sub modules](#sub-modules)
 - [Logging](#logging)
+- [Tracing](#tracing)
 - [Debugging](#debugging)
 - [Security Considerations](#security-considerations)
 - [Requirements](#requirements)
@@ -427,6 +428,17 @@ An example log message of the scale-up function:
     }
 }
 ```
+## Tracing
+For the distributed architecture of this application it can be difficult to troubleshoot this application.
+We support the option to enable tracing for all the lambda functions created by this application. To enable tracing user can simply provide the `tracing_config` option inside the root module or inner modules.
+
+This tracing config generates timelines for following events:
+- Basic lifecycle of lambda function
+- Traces for Github API calls (can be configured by capture_http_requests).
+- Traces for all AWS SDK calls
+
+This feature has been disabled by default.
+
 
 ## Debugging
 
@@ -543,7 +555,7 @@ We welcome any improvement to the standard module to make the default as secure
 | <a name="input_lambda_s3_bucket"></a> [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket from which to specify lambda functions. This is an alternative to providing local files directly. | `string` | `null` | no |
 | <a name="input_lambda_security_group_ids"></a> [lambda\_security\_group\_ids](#input\_lambda\_security\_group\_ids) | List of security group IDs associated with the Lambda function. | `list(string)` | `[]` | no |
 | <a name="input_lambda_subnet_ids"></a> [lambda\_subnet\_ids](#input\_lambda\_subnet\_ids) | List of subnets in which the action runners will be launched, the subnets needs to be subnets in the `vpc_id`. | `list(string)` | `[]` | no |
-| <a name="input_lambda_tracing_mode"></a> [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | Enable X-Ray tracing for the lambda functions. | `string` | `null` | no |
+| <a name="input_lambda_tracing_mode"></a> [lambda\_tracing\_mode](#input\_lambda\_tracing\_mode) | DEPRECATED: Replaced by `tracing_config`. | `string` | `null` | no |
 | <a name="input_log_level"></a> [log\_level](#input\_log\_level) | Logging level for lambda logging. Valid values are  'silly', 'trace', 'debug', 'info', 'warn', 'error', 'fatal'. | `string` | `"info"` | no |
 | <a name="input_logging_kms_key_id"></a> [logging\_kms\_key\_id](#input\_logging\_kms\_key\_id) | Specifies the kms key id to encrypt the logs with. | `string` | `null` | no |
 | <a name="input_logging_retention_in_days"></a> [logging\_retention\_in\_days](#input\_logging\_retention\_in\_days) | Specifies the number of days you want to retain log events for the lambda log group. Possible values are: 0, 1, 3, 5, 7, 14, 30, 60, 90, 120, 150, 180, 365, 400, 545, 731, 1827, and 3653. | `number` | `180` | no |
@@ -593,6 +605,7 @@ We welcome any improvement to the standard module to make the default as secure
 | <a name="input_syncer_lambda_s3_key"></a> [syncer\_lambda\_s3\_key](#input\_syncer\_lambda\_s3\_key) | S3 key for syncer lambda function. Required if using an S3 bucket to specify lambdas. | `string` | `null` | no |
 | <a name="input_syncer_lambda_s3_object_version"></a> [syncer\_lambda\_s3\_object\_version](#input\_syncer\_lambda\_s3\_object\_version) | S3 object version for syncer lambda function. Useful if S3 versioning is enabled on source bucket. | `string` | `null` | no |
 | <a name="input_tags"></a> [tags](#input\_tags) | Map of tags that will be added to created resources. By default resources will be tagged with name and environment. | `map(string)` | `{}` | no |
+| <a name="input_tracing_config"></a> [tracing\_config](#input\_tracing\_config) | Configuration for lambda tracing. | <pre>object({<br>    mode                  = optional(string, null)<br>    capture_http_requests = optional(bool, false)<br>    capture_error         = optional(bool, false)<br>  })</pre> | `{}` | no |
 | <a name="input_userdata_post_install"></a> [userdata\_post\_install](#input\_userdata\_post\_install) | Script to be ran after the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no |
 | <a name="input_userdata_pre_install"></a> [userdata\_pre\_install](#input\_userdata\_pre\_install) | Script to be ran before the GitHub Actions runner is installed on the EC2 instances | `string` | `""` | no |
 | <a name="input_userdata_template"></a> [userdata\_template](#input\_userdata\_template) | Alternative user-data template, replacing the default template. By providing your own user\_data you have to take care of installing all required software, including the action runner. Variables userdata\_pre/post\_install are ignored. | `string` | `null` | no |
 
@@ -69,11 +69,20 @@ module "runners" {
   #
   enable_job_queued_check = true
 
+  # tracing_config = {
+  #   mode                  = "Active"
+  #   capture_error         = true
+  #   capture_http_requests = true
+  # }
+
+
   # configure your pre-built AMI
   # enable_userdata = false
-  # ami_filter       = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] }
-  # data "aws_caller_identity" "current" {}
-  # ami_owners       = [data.aws_caller_identity.current.account_id]
+  # ami_filter      = { name = ["github-runner-al2023-x86_64-*"], state = ["available"] }
+  # ami_owners      = [data.aws_caller_identity.current.account_id]
+
+  # or use the default AMI
+  # enable_userdata = true
 
   # Enable debug logging for the lambda functions
   # log_level = "debug"
 
@@ -57,6 +57,12 @@ module "runners" {
     id             = var.github_app.id
     webhook_secret = random_id.random.hex
   }
+  # enable this section for tracing
+  # tracing_config = {
+  #   mode                  = "Active"
+  #   capture_error         = true
+  #   capture_http_requests = true
+  # }
   # Assuming local build lambda's to use pre build ones, uncomment the lines below and download the
   # lambda zip files lambda_download
   # webhook_lambda_zip                = "../lambdas-download/webhook.zip"
 
@@ -10,6 +10,7 @@ import {
 } from '@aws-sdk/client-ec2';
 import { DescribeParametersCommand, GetParameterCommand, SSMClient } from '@aws-sdk/client-ssm';
 import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util';
+import { getTracedAWSV3Client } from '@terraform-aws-github-runner/aws-powertools-util';
 
 const logger = createChildLogger('ami');
 
@@ -82,7 +83,7 @@ async function getAmisNotInUse(options: AmiCleanupOptions) {
   const amiIdsInSSM = await getAmisReferedInSSM(options);
   const amiIdsInTemplates = await getAmiInLatestTemplates(options);
 
-  const ec2Client = new EC2Client({});
+  const ec2Client = getTracedAWSV3Client(new EC2Client({}));
   logger.debug('Getting all AMIs from ec2 with filters', { filters: options.amiFilters });
   const amiEc2 = await ec2Client.send(
     new DescribeImagesCommand({
@@ -133,7 +134,7 @@ async function deleteAmi(amiDetails: Image, options: AmiCleanupOptionsInternal):
 
   try {
     logger.info(`deleting ami ${amiDetails.Name || amiDetails.ImageId} created at ${amiDetails.CreationDate}`);
-    const ec2Client = new EC2Client({});
+    const ec2Client = getTracedAWSV3Client(new EC2Client({}));
     await ec2Client.send(new DeregisterImageCommand({ ImageId: amiDetails.ImageId, DryRun: options.dryRun }));
     await deleteSnapshot(options, amiDetails, ec2Client);
   } catch (error) {
@@ -158,7 +159,7 @@ async function deleteSnapshot(options: AmiCleanupOptions, amiDetails: Image, ec2
 }
 
 async function getAmiInLatestTemplates(options: AmiCleanupOptions): Promise<(string | undefined)[]> {
-  const ec2Client = new EC2Client({});
+  const ec2Client = getTracedAWSV3Client(new EC2Client({}));
   const launnchTemplates = await ec2Client.send(
     new DescribeLaunchTemplatesCommand({
       LaunchTemplateNames: options.launchTemplateNames,
@@ -188,7 +189,7 @@ async function getAmisReferedInSSM(options: AmiCleanupOptions): Promise<(string
     return [];
   }
 
-  const ssmClient = new SSMClient({});
+  const ssmClient = getTracedAWSV3Client(new SSMClient({}));
   const ssmParams = await ssmClient.send(
     new DescribeParametersCommand({
       ParameterFilters: [
 
@@ -40,11 +40,13 @@
   "dependencies": {
     "@aws-sdk/client-ec2": "^3.436.0",
     "@aws-sdk/types": "^3.433.0",
+    "@middy/core": "^3.6.2",
     "@octokit/auth-app": "6.0.1",
     "@octokit/rest": "20.0.2",
     "@octokit/types": "^12.1.1",
     "@terraform-aws-github-runner/aws-powertools-util": "*",
     "@terraform-aws-github-runner/aws-ssm-util": "*",
+    "axios": "^1.5.1",
     "cron-parser": "^4.8.1",
     "typescript": "^5.0.4"
   }
 
@@ -39,4 +39,5 @@ export interface RunnerInputParameters {
   };
   numberOfRunners?: number;
   amiIdSsmParameterName?: string;
+  tracingEnabled?: boolean;
 }
@@ -10,6 +10,7 @@ import {
   TerminateInstancesCommand,
 } from '@aws-sdk/client-ec2';
 import { GetParameterCommand, GetParameterResult, PutParameterCommand, SSMClient } from '@aws-sdk/client-ssm';
+import { tracer } from '@terraform-aws-github-runner/aws-powertools-util';
 import { mockClient } from 'aws-sdk-client-mock';
 import 'aws-sdk-client-mock-jest';
 
@@ -236,6 +237,15 @@ describe('create runner', () => {
       Name: 'my-ami-id-param',
     });
   });
+  it('calls create fleet of 1 instance with runner tracing enabled', async () => {
+    tracer.getRootXrayTraceId = jest.fn().mockReturnValue('123');
+
+    await createRunner(createRunnerConfig({ ...defaultRunnerConfig, tracingEnabled: true }));
+
+    expect(mockEC2Client).toHaveReceivedCommandWith(CreateFleetCommand, {
+      ...expectedCreateFleetRequest({ ...defaultExpectedFleetRequestValues, tracingEnabled: true }),
+    });
+  });
 });
 
 describe('create runner with errors', () => {
@@ -350,6 +360,7 @@ interface RunnerConfig {
   allocationStrategy: SpotAllocationStrategy;
   maxSpotPrice?: string;
   amiIdSsmParameterName?: string;
+  tracingEnabled?: boolean;
 }
 
 function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
@@ -366,6 +377,7 @@ function createRunnerConfig(runnerConfig: RunnerConfig): RunnerInputParameters {
     },
     subnets: ['subnet-123', 'subnet-456'],
     amiIdSsmParameterName: runnerConfig.amiIdSsmParameterName,
+    tracingEnabled: runnerConfig.tracingEnabled,
   };
 }
 
@@ -376,6 +388,7 @@ interface ExpectedFleetRequestValues {
   maxSpotPrice?: string;
   totalTargetCapacity: number;
   imageId?: string;
+  tracingEnabled?: boolean;
 }
 
 function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues): CreateFleetCommandInput {
@@ -385,6 +398,10 @@ function expectedCreateFleetRequest(expectedValues: ExpectedFleetRequestValues):
     { Key: 'ghr:Type', Value: expectedValues.type },
     { Key: 'ghr:Owner', Value: REPO_NAME },
   ];
+  if (expectedValues.tracingEnabled) {
+    const traceId = tracer.getRootXrayTraceId();
+    tags.push({ Key: 'ghr:trace_id', Value: traceId! });
+  }
   const request: CreateFleetCommandInput = {
     LaunchTemplateConfigs: [
       {
 
@@ -9,6 +9,7 @@ import {
   _InstanceType,
 } from '@aws-sdk/client-ec2';
 import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util';
+import { getTracedAWSV3Client, tracer } from '@terraform-aws-github-runner/aws-powertools-util';
 import { getParameter } from '@terraform-aws-github-runner/aws-ssm-util';
 import moment from 'moment';
 
@@ -56,7 +57,7 @@ function constructFilters(filters?: Runners.ListRunnerFilters): Ec2Filter[][] {
 }
 
 async function getRunners(ec2Filters: Ec2Filter[]): Promise<Runners.RunnerList[]> {
-  const ec2 = new EC2Client({ region: process.env.AWS_REGION });
+  const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
   const runners: Runners.RunnerList[] = [];
   let nextToken;
   let hasNext = true;
@@ -93,7 +94,7 @@ function getRunnerInfo(runningInstances: DescribeInstancesResult) {
 }
 
 export async function terminateRunner(instanceId: string): Promise<void> {
-  const ec2 = new EC2Client({ region: process.env.AWS_REGION });
+  const ec2 = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
   await ec2.send(new TerminateInstancesCommand({ InstanceIds: [instanceId] }));
   logger.info(`Runner ${instanceId} has been terminated.`);
 }
@@ -126,7 +127,7 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete
     },
   });
 
-  const ec2Client = new EC2Client({ region: process.env.AWS_REGION });
+  const ec2Client = getTracedAWSV3Client(new EC2Client({ region: process.env.AWS_REGION }));
 
   let amiIdOverride = undefined;
 
@@ -145,13 +146,19 @@ export async function createRunner(runnerParameters: Runners.RunnerInputParamete
   }
 
   const numberOfRunners = runnerParameters.numberOfRunners ? runnerParameters.numberOfRunners : 1;
+
   const tags = [
     { Key: 'ghr:Application', Value: 'github-action-runner' },
     { Key: 'ghr:created_by', Value: numberOfRunners === 1 ? 'scale-up-lambda' : 'pool-lambda' },
     { Key: 'ghr:Type', Value: runnerParameters.runnerType },
     { Key: 'ghr:Owner', Value: runnerParameters.runnerOwner },
   ];
 
+  if (runnerParameters.tracingEnabled) {
+    const traceId = tracer.getRootXrayTraceId();
+    tags.push({ Key: 'ghr:trace_id', Value: traceId! });
+  }
+
   let fleet: CreateFleetResult;
   try {
     // see for spec https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html
 
@@ -0,0 +1,31 @@
+import axios, { AxiosResponse } from 'axios';
+
+import { axiosFetch } from './fetch-override';
+
+jest.mock('axios');
+type FetchResponse = AxiosResponse & { json: () => string };
+
+describe('axiosFetch', () => {
+  beforeEach(() => {
+    jest.clearAllMocks();
+  });
+  it('should return a promise that resolves with the response data', async () => {
+    // Arrange
+    const url = 'https://example.com';
+    const options = { body: { foo: 'bar' } };
+    const responseData = { data: { baz: 'qux' } };
+    const mockedAxios = axios as unknown as jest.Mock;
+    mockedAxios.mockResolvedValue(responseData);
+
+    // Act
+    const result = (await axiosFetch(url, options)) as FetchResponse;
+
+    // Assert
+    expect(axios).toHaveBeenCalledWith(url, { ...options, data: options.body });
+    expect(result).toEqual({
+      ...responseData,
+      json: expect.any(Function),
+    });
+    expect(result.json()).toEqual(responseData.data);
+  });
+});
@@ -0,0 +1,19 @@
+import axios, { AxiosRequestConfig, AxiosResponse } from 'axios';
+
+type FetchResponse = AxiosResponse & { json: () => string };
+
+type FetchOptions = AxiosRequestConfig & { body?: object };
+
+// Fetch is not covered to be traced by xray so we need to override it with axios
+// https://github.com/aws/aws-xray-sdk-node/issues/531
+export const axiosFetch = async (url: string, options: FetchOptions): Promise<FetchResponse> => {
+  const response = await axios(url, { ...options, data: options.body });
+  return new Promise((resolve) => {
+    resolve({
+      ...response,
+      json: () => {
+        return response.data;
+      },
+    });
+  });
+};
@@ -95,7 +95,7 @@ ${decryptedValue}`,
 
     // Assert
     expect(mockedCreatAppAuth).toBeCalledTimes(1);
-    expect(mockedCreatAppAuth).toBeCalledWith(authOptions);
+    expect(mockedCreatAppAuth).toBeCalledWith({ ...authOptions, request: expect.anything() });
   });
 
   test('Creates auth object for public GitHub', async () => {
@@ -121,7 +121,7 @@ ${decryptedValue}`,
     expect(getParameter).toBeCalledWith(PARAMETER_GITHUB_APP_KEY_BASE64_NAME);
 
     expect(mockedCreatAppAuth).toBeCalledTimes(1);
-    expect(mockedCreatAppAuth).toBeCalledWith(authOptions);
+    expect(mockedCreatAppAuth).toBeCalledWith({ ...authOptions, request: expect.anything() });
     expect(mockedAuth).toBeCalledWith({ type: authType });
     expect(result.token).toBe(token);
   });
 
@@ -13,11 +13,13 @@ import { Octokit } from '@octokit/rest';
 import { createChildLogger } from '@terraform-aws-github-runner/aws-powertools-util';
 import { getParameter } from '@terraform-aws-github-runner/aws-ssm-util';
 
-const logger = createChildLogger('gh-auth');
+import { axiosFetch } from '../axios/fetch-override';
 
+const logger = createChildLogger('gh-auth');
 export async function createOctoClient(token: string, ghesApiUrl = ''): Promise<Octokit> {
   const ocktokitOptions: OctokitOptions = {
     auth: token,
+    request: { fetch: axiosFetch },
   };
   if (ghesApiUrl) {
     ocktokitOptions.baseUrl = ghesApiUrl;
@@ -64,7 +66,12 @@ async function createAuth(installationId: number | undefined, ghesApiUrl: string
   if (ghesApiUrl) {
     authOptions.request = request.defaults({
       baseUrl: ghesApiUrl,
+      request: {
+        fetch: axiosFetch,
+      },
     });
+  } else {
+    authOptions.request = request.defaults({ request: { fetch: axiosFetch } });
   }
   return createAppAuth(authOptions);
 }
@@ -1,8 +1,8 @@
-import { logger } from '@terraform-aws-github-runner/aws-powertools-util';
+import { captureLambdaHandler, logger } from '@terraform-aws-github-runner/aws-powertools-util';
 import { Context, SQSEvent, SQSRecord } from 'aws-lambda';
 import { mocked } from 'jest-mock';
 
-import { adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda';
+import { addMiddleware, adjustPool, scaleDownHandler, scaleUpHandler, ssmHousekeeper } from './lambda';
 import { adjust } from './pool/pool';
 import ScaleError from './scale-runners/ScaleError';
 import { scaleDown } from './scale-runners/scale-down';
@@ -161,6 +161,14 @@ describe('Adjust pool.', () => {
   });
 });
 
+describe('Test middleware', () => {
+  it('Should have a working middleware', async () => {
+    const mockedLambdaHandler = captureLambdaHandler as unknown as jest.Mock;
+    mockedLambdaHandler.mockReturnValue({ before: jest.fn(), after: jest.fn(), onError: jest.fn() });
+    expect(addMiddleware).not.toThrowError();
+  });
+});
+
 describe('Test ssm housekeeper lambda wrapper.', () => {
   it('Invoke without errors.', async () => {
     const mock = mocked(cleanSSMTokens);
Original file line number	Diff line number	Diff line change
`@@ -39,4 +39,5 @@ export interface RunnerInputParameters {`
`39`	`39`	`};`
`40`	`40`	`numberOfRunners?: number;`
`41`	`41`	`amiIdSsmParameterName?: string;`
	`42`	`+ tracingEnabled?: boolean;`
`42`	`43`	`}`