diff --git a/aws_lambda_powertools/logging/correlation_paths.py b/aws_lambda_powertools/logging/correlation_paths.py index cbccd85637f..004aa2a59a3 100644 --- a/aws_lambda_powertools/logging/correlation_paths.py +++ b/aws_lambda_powertools/logging/correlation_paths.py @@ -5,3 +5,4 @@ APPSYNC_RESOLVER = 'request.headers."x-amzn-trace-id"' APPLICATION_LOAD_BALANCER = 'headers."x-amzn-trace-id"' EVENT_BRIDGE = "id" +S3_OBJECT_LAMBDA = "xAmzRequestId" diff --git a/aws_lambda_powertools/utilities/data_classes/s3_object_event.py b/aws_lambda_powertools/utilities/data_classes/s3_object_event.py new file mode 100644 index 00000000000..f653f7aca6e --- /dev/null +++ b/aws_lambda_powertools/utilities/data_classes/s3_object_event.py @@ -0,0 +1,333 @@ +from typing import Dict, Optional + +from aws_lambda_powertools.utilities.data_classes.common import DictWrapper, get_header_value + + +class S3ObjectContext(DictWrapper): + """The input and output details for connections to Amazon S3 and S3 Object Lambda.""" + + @property + def input_s3_url(self) -> str: + """A pre-signed URL that can be used to fetch the original object from Amazon S3. + + The URL is signed using the original caller’s identity, and their permissions + will apply when the URL is used. If there are signed headers in the URL, the + Lambda function must include these in the call to Amazon S3, except for the Host.""" + return self["inputS3Url"] + + @property + def output_route(self) -> str: + """A routing token that is added to the S3 Object Lambda URL when the Lambda function + calls `WriteGetObjectResponse`.""" + return self["outputRoute"] + + @property + def output_token(self) -> str: + """An opaque token used by S3 Object Lambda to match the WriteGetObjectResponse call + with the original caller.""" + return self["outputToken"] + + +class S3ObjectConfiguration(DictWrapper): + """Configuration information about the S3 Object Lambda access point.""" + + @property + def access_point_arn(self) -> str: + """The Amazon Resource Name (ARN) of the S3 Object Lambda access point that received + this request.""" + return self["accessPointArn"] + + @property + def supporting_access_point_arn(self) -> str: + """The ARN of the supporting access point that is specified in the S3 Object Lambda + access point configuration.""" + return self["supportingAccessPointArn"] + + @property + def payload(self) -> str: + """Custom data that is applied to the S3 Object Lambda access point configuration. + + S3 Object Lambda treats this as an opaque string, so it might need to be decoded + before use.""" + return self["payload"] + + +class S3ObjectUserRequest(DictWrapper): + """ Information about the original call to S3 Object Lambda.""" + + @property + def url(self) -> str: + """The decoded URL of the request as received by S3 Object Lambda, excluding any + authorization-related query parameters.""" + return self["url"] + + @property + def headers(self) -> Dict[str, str]: + """A map of string to strings containing the HTTP headers and their values from the original call, + excluding any authorization-related headers. + + If the same header appears multiple times, their values are combined into a comma-delimited list. + The case of the original headers is retained in this map.""" + return self["headers"] + + def get_header_value( + self, name: str, default_value: Optional[str] = None, case_sensitive: Optional[bool] = False + ) -> Optional[str]: + """Get header value by name + + Parameters + ---------- + name: str + Header name + default_value: str, optional + Default value if no value was found by name + case_sensitive: bool + Whether to use a case sensitive look up + Returns + ------- + str, optional + Header value + """ + return get_header_value(self.headers, name, default_value, case_sensitive) + + +class S3ObjectSessionIssuer(DictWrapper): + @property + def get_type(self) -> str: + """The source of the temporary security credentials, such as Root, IAMUser, or Role.""" + return self["type"] + + @property + def user_name(self) -> str: + """The friendly name of the user or role that issued the session.""" + return self["userName"] + + @property + def principal_id(self) -> str: + """The internal ID of the entity that was used to get credentials.""" + return self["principalId"] + + @property + def arn(self) -> str: + """The ARN of the source (account, IAM user, or role) that was used to get temporary security credentials.""" + return self["arn"] + + @property + def account_id(self) -> str: + """The account that owns the entity that was used to get credentials.""" + return self["accountId"] + + +class S3ObjectSessionAttributes(DictWrapper): + @property + def creation_date(self) -> str: + """The date and time when the temporary security credentials were issued. + Represented in ISO 8601 basic notation.""" + return self["creationDate"] + + @property + def mfa_authenticated(self) -> str: + """The value is true if the root user or IAM user whose credentials were used for the request also was + authenticated with an MFA device; otherwise, false..""" + return self["mfaAuthenticated"] + + +class S3ObjectSessionContext(DictWrapper): + @property + def session_issuer(self) -> S3ObjectSessionIssuer: + """If the request was made with temporary security credentials, an element that provides information + about how the credentials were obtained.""" + return S3ObjectSessionIssuer(self["sessionIssuer"]) + + @property + def attributes(self) -> S3ObjectSessionAttributes: + """Session attributes.""" + return S3ObjectSessionAttributes(self["attributes"]) + + +class S3ObjectUserIdentity(DictWrapper): + """Details about the identity that made the call to S3 Object Lambda. + + Documentation: + ------------- + - https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-event-reference-user-identity.html + """ + + @property + def get_type(self) -> str: + """The type of identity. + + The following values are possible: + + - Root – The request was made with your AWS account credentials. If the userIdentity + type is Root and you set an alias for your account, the userName field contains your account alias. + For more information, see Your AWS Account ID and Its Alias. + - IAMUser – The request was made with the credentials of an IAM user. + - AssumedRole – The request was made with temporary security credentials that were obtained + with a role via a call to the AWS Security Token Service (AWS STS) AssumeRole API. This can include + roles for Amazon EC2 and cross-account API access. + - FederatedUser – The request was made with temporary security credentials that were obtained via a + call to the AWS STS GetFederationToken API. The sessionIssuer element indicates if the API was + called with root or IAM user credentials. + - AWSAccount – The request was made by another AWS account. + - AWSService – The request was made by an AWS account that belongs to an AWS service. + For example, AWS Elastic Beanstalk assumes an IAM role in your account to call other AWS services + on your behalf. + """ + return self["type"] + + @property + def account_id(self) -> str: + """The account that owns the entity that granted permissions for the request. + + If the request was made with temporary security credentials, this is the account that owns the IAM + user or role that was used to obtain credentials.""" + return self["accountId"] + + @property + def access_key_id(self) -> str: + """The access key ID that was used to sign the request. + + If the request was made with temporary security credentials, this is the access key ID of + the temporary credentials. For security reasons, accessKeyId might not be present, or might + be displayed as an empty string.""" + return self["accessKeyId"] + + @property + def user_name(self) -> str: + """The friendly name of the identity that made the call.""" + return self["userName"] + + @property + def principal_id(self) -> str: + """The unique identifier for the identity that made the call. + + For requests made with temporary security credentials, this value includes + the session name that is passed to the AssumeRole, AssumeRoleWithWebIdentity, + or GetFederationToken API call.""" + return self["principalId"] + + @property + def arn(self) -> str: + """The ARN of the principal that made the call. + The last section of the ARN contains the user or role that made the call.""" + return self["arn"] + + @property + def session_context(self) -> Optional[S3ObjectSessionContext]: + """If the request was made with temporary security credentials, + this element provides information about the session that was created for those credentials.""" + session_context = self.get("sessionContext") + + if session_context is None: + return None + + return S3ObjectSessionContext(session_context) + + +class S3ObjectLambdaEvent(DictWrapper): + """S3 object lambda event + + Documentation: + ------------- + - https://docs.aws.amazon.com/AmazonS3/latest/userguide/olap-writing-lambda.html + + Example + ------- + **Fetch and transform original object from Amazon S3** + + import boto3 + import requests + from aws_lambda_powertools.utilities.data_classes.s3_object_event import S3ObjectLambdaEvent + + session = boto3.Session() + s3 = session.client("s3") + + def lambda_handler(event, context): + event = S3ObjectLambdaEvent(event) + + # Get object from S3 + response = requests.get(event.input_s3_url) + original_object = response.content.decode("utf-8") + + # Make changes to the object about to be returned + transformed_object = original_object.upper() + + # Write object back to S3 Object Lambda + s3.write_get_object_response( + Body=transformed_object, RequestRoute=event.request_route, RequestToken=event.request_token + ) + """ + + @property + def request_id(self) -> str: + """The Amazon S3 request ID for this request. We recommend that you log this value to help with debugging.""" + return self["xAmzRequestId"] + + @property + def object_context(self) -> S3ObjectContext: + """The input and output details for connections to Amazon S3 and S3 Object Lambda.""" + return S3ObjectContext(self["getObjectContext"]) + + @property + def configuration(self) -> S3ObjectConfiguration: + """Configuration information about the S3 Object Lambda access point.""" + return S3ObjectConfiguration(self["configuration"]) + + @property + def user_request(self) -> S3ObjectUserRequest: + """Information about the original call to S3 Object Lambda.""" + return S3ObjectUserRequest(self["userRequest"]) + + @property + def user_identity(self) -> S3ObjectUserIdentity: + """Details about the identity that made the call to S3 Object Lambda.""" + return S3ObjectUserIdentity(self["userIdentity"]) + + @property + def request_route(self) -> str: + """A routing token that is added to the S3 Object Lambda URL when the Lambda function + calls `WriteGetObjectResponse`.""" + return self.object_context.output_route + + @property + def request_token(self) -> str: + """An opaque token used by S3 Object Lambda to match the WriteGetObjectResponse call + with the original caller.""" + return self.object_context.output_token + + @property + def input_s3_url(self) -> str: + """A pre-signed URL that can be used to fetch the original object from Amazon S3. + + The URL is signed using the original caller’s identity, and their permissions + will apply when the URL is used. If there are signed headers in the URL, the + Lambda function must include these in the call to Amazon S3, except for the Host. + + Example + ------- + **Fetch original object from Amazon S3** + + import requests + from aws_lambda_powertools.utilities.data_classes.s3_object_event import S3ObjectLambdaEvent + + def lambda_handler(event, context): + event = S3ObjectLambdaEvent(event) + + response = requests.get(event.input_s3_url) + original_object = response.content.decode("utf-8") + ... + """ + return self.object_context.input_s3_url + + @property + def protocol_version(self) -> str: + """The version ID of the context provided. + + The format of this field is `{Major Version}`.`{Minor Version}`. + The minor version numbers are always two-digit numbers. Any removal or change to the semantics of a + field will necessitate a major version bump and will require active opt-in. Amazon S3 can add new + fields at any time, at which point you might experience a minor version bump. Due to the nature of + software rollouts, it is possible that you might see multiple minor versions in use at once. + """ + return self["protocolVersion"] diff --git a/docs/utilities/data_classes.md b/docs/utilities/data_classes.md index b834390cc15..c7c11b6b2f9 100644 --- a/docs/utilities/data_classes.md +++ b/docs/utilities/data_classes.md @@ -59,6 +59,7 @@ Event Source | Data_class [EventBridge](#eventbridge) | `EventBridgeEvent` [Kinesis Data Stream](#kinesis-streams) | `KinesisStreamEvent` [S3](#s3) | `S3Event` +[S3 Object Lambda](#s3-object-lambda) | `S3ObjectLambdaEvent` [SES](#ses) | `SESEvent` [SNS](#sns) | `SNSEvent` [SQS](#sqs) | `SQSEvent` @@ -547,6 +548,43 @@ or plain text, depending on the original payload. do_something_with(f'{bucket_name}/{object_key}') ``` +### S3 Object Lambda + +This example is based on the AWS Blog post [Introducing Amazon S3 Object Lambda – Use Your Code to Process Data as It Is Being Retrieved from S3](https://aws.amazon.com/blogs/aws/introducing-amazon-s3-object-lambda-use-your-code-to-process-data-as-it-is-being-retrieved-from-s3/){target="_blank"}. + +=== "app.py" + + ```python hl_lines="4-5 10 12" + import boto3 + import requests + + from aws_lambda_powertools import Logger + from aws_lambda_powertools.logging.correlation_paths import S3_OBJECT_LAMBDA + from aws_lambda_powertools.utilities.data_classes.s3_object_event import S3ObjectLambdaEvent + + logger = Logger() + session = boto3.Session() + s3 = session.client("s3") + + @logger.inject_lambda_context(correlation_id_path=S3_OBJECT_LAMBDA, log_event=True) + def lambda_handler(event, context): + event = S3ObjectLambdaEvent(event) + + # Get object from S3 + response = requests.get(event.input_s3_url) + original_object = response.content.decode("utf-8") + + # Make changes to the object about to be returned + transformed_object = original_object.upper() + + # Write object back to S3 Object Lambda + s3.write_get_object_response( + Body=transformed_object, RequestRoute=event.request_route, RequestToken=event.request_token + ) + + return {"status_code": 200} + ``` + ### SES === "app.py" diff --git a/tests/events/s3ObjectEventIAMUser.json b/tests/events/s3ObjectEventIAMUser.json new file mode 100644 index 00000000000..6be41c4352b --- /dev/null +++ b/tests/events/s3ObjectEventIAMUser.json @@ -0,0 +1,30 @@ +{ + "xAmzRequestId": "1a5ed718-5f53-471d-b6fe-5cf62d88d02a", + "getObjectContext": { + "inputS3Url": "https://myap-123412341234.s3-accesspoint.us-east-1.amazonaws.com/s3.txt?X-Amz-Security-Token=...", + "outputRoute": "io-iad-cell001", + "outputToken": "..." + }, + "configuration": { + "accessPointArn": "arn:aws:s3-object-lambda:us-east-1:123412341234:accesspoint/myolap", + "supportingAccessPointArn": "arn:aws:s3:us-east-1:123412341234:accesspoint/myap", + "payload": "test" + }, + "userRequest": { + "url": "/s3.txt", + "headers": { + "Host": "myolap-123412341234.s3-object-lambda.us-east-1.amazonaws.com", + "Accept-Encoding": "identity", + "X-Amz-Content-SHA256": "e3b0c44297fc1c149afbf4c8995fb92427ae41e4649b934ca495991b7852b855" + } + }, + "userIdentity": { + "type": "IAMUser", + "principalId": "...", + "arn": "arn:aws:iam::123412341234:user/myuser", + "accountId": "123412341234", + "accessKeyId": "...", + "userName": "Alice" + }, + "protocolVersion": "1.00" +} diff --git a/tests/events/s3ObjectEventTempCredentials.json b/tests/events/s3ObjectEventTempCredentials.json new file mode 100644 index 00000000000..30c70fe6df9 --- /dev/null +++ b/tests/events/s3ObjectEventTempCredentials.json @@ -0,0 +1,42 @@ +{ + "xAmzRequestId": "requestId", + "getObjectContext": { + "inputS3Url": "https://my-s3-ap-111122223333.s3-accesspoint.us-east-1.amazonaws.com/example?X-Amz-Security-Token=", + "outputRoute": "io-use1-001", + "outputToken": "OutputToken" + }, + "configuration": { + "accessPointArn": "arn:aws:s3-object-lambda:us-east-1:111122223333:accesspoint/example-object-lambda-ap", + "supportingAccessPointArn": "arn:aws:s3:us-east-1:111122223333:accesspoint/example-ap", + "payload": "{}" + }, + "userRequest": { + "url": "https://object-lambda-111122223333.s3-object-lambda.us-east-1.amazonaws.com/example", + "headers": { + "Host": "object-lambda-111122223333.s3-object-lambda.us-east-1.amazonaws.com", + "Accept-Encoding": "identity", + "X-Amz-Content-SHA256": "e3b0c44298fc1example" + } + }, + "userIdentity": { + "type": "AssumedRole", + "principalId": "principalId", + "arn": "arn:aws:sts::111122223333:assumed-role/Admin/example", + "accountId": "111122223333", + "accessKeyId": "accessKeyId", + "sessionContext": { + "attributes": { + "mfaAuthenticated": "false", + "creationDate": "Wed Mar 10 23:41:52 UTC 2021" + }, + "sessionIssuer": { + "type": "Role", + "principalId": "principalId", + "arn": "arn:aws:iam::111122223333:role/Admin", + "accountId": "111122223333", + "userName": "Admin" + } + } + }, + "protocolVersion": "1.00" +} diff --git a/tests/functional/test_lambda_trigger_events.py b/tests/functional/test_lambda_trigger_events.py index 7517c1a3c87..62bcb50762c 100644 --- a/tests/functional/test_lambda_trigger_events.py +++ b/tests/functional/test_lambda_trigger_events.py @@ -48,6 +48,7 @@ DynamoDBStreamEvent, StreamViewType, ) +from aws_lambda_powertools.utilities.data_classes.s3_object_event import S3ObjectLambdaEvent def load_event(file_name: str) -> dict: @@ -1005,3 +1006,56 @@ def test_appsync_resolver_event_empty(): assert event.info.field_name is None assert event.info.parent_type_name is None + + +def test_s3_object_event_iam(): + event = S3ObjectLambdaEvent(load_event("s3ObjectEventIAMUser.json")) + + assert event.request_id == "1a5ed718-5f53-471d-b6fe-5cf62d88d02a" + assert event.object_context is not None + object_context = event.object_context + assert object_context.input_s3_url == event["getObjectContext"]["inputS3Url"] + assert object_context.output_route == event["getObjectContext"]["outputRoute"] + assert object_context.output_token == event["getObjectContext"]["outputToken"] + assert event.configuration is not None + configuration = event.configuration + assert configuration.access_point_arn == event["configuration"]["accessPointArn"] + assert configuration.supporting_access_point_arn == event["configuration"]["supportingAccessPointArn"] + assert configuration.payload == event["configuration"]["payload"] + assert event.user_request is not None + user_request = event.user_request + assert user_request.url == event["userRequest"]["url"] + assert user_request.headers == event["userRequest"]["headers"] + assert user_request.get_header_value("Accept-Encoding") == "identity" + assert event.user_identity is not None + user_identity = event.user_identity + assert user_identity.get_type == event["userIdentity"]["type"] + assert user_identity.principal_id == event["userIdentity"]["principalId"] + assert user_identity.arn == event["userIdentity"]["arn"] + assert user_identity.account_id == event["userIdentity"]["accountId"] + assert user_identity.access_key_id == event["userIdentity"]["accessKeyId"] + assert user_identity.user_name == event["userIdentity"]["userName"] + assert user_identity.session_context is None + assert event.protocol_version == event["protocolVersion"] + assert event.request_route == object_context.output_route + assert event.request_token == object_context.output_token + assert event.input_s3_url == object_context.input_s3_url + + +def test_s3_object_event_temp_credentials(): + event = S3ObjectLambdaEvent(load_event("s3ObjectEventTempCredentials.json")) + + assert event.request_id == "requestId" + session_context = event.user_identity.session_context + assert session_context is not None + session_issuer = session_context.session_issuer + assert session_issuer is not None + assert session_issuer.get_type == session_context["sessionIssuer"]["type"] + assert session_issuer.user_name == session_context["sessionIssuer"]["userName"] + assert session_issuer.principal_id == session_context["sessionIssuer"]["principalId"] + assert session_issuer.arn == session_context["sessionIssuer"]["arn"] + assert session_issuer.account_id == session_context["sessionIssuer"]["accountId"] + session_attributes = session_context.attributes + assert session_attributes is not None + assert session_attributes.mfa_authenticated == session_context["attributes"]["mfaAuthenticated"] + assert session_attributes.creation_date == session_context["attributes"]["creationDate"]