diff --git a/sample-apps/file-processing-python/lambda_function.py b/sample-apps/file-processing-python/lambda_function.py new file mode 100644 index 00000000..58b5bb2a --- /dev/null +++ b/sample-apps/file-processing-python/lambda_function.py @@ -0,0 +1,46 @@ +from pypdf import PdfReader, PdfWriter +import uuid +import os +from urllib.parse import unquote_plus +import boto3 + +# Create the S3 client to download and upload objects from S3 +s3_client = boto3.client('s3') + +def lambda_handler(event, context): + # Iterate over the S3 event object and get the key for all uploaded files + for record in event['Records']: + bucket = record['s3']['bucket']['name'] + key = unquote_plus(record['s3']['object']['key']) # Decode the S3 object key to remove any URL-encoded characters + download_path = f'/tmp/{uuid.uuid4()}.pdf' # Create a path in the Lambda tmp directory to save the file to + upload_path = f'/tmp/converted-{uuid.uuid4()}.pdf' # Create another path to save the encrypted file to + + # If the file is a PDF, encrypt it and upload it to the destination S3 bucket + if key.lower().endswith('.pdf'): + s3_client.download_file(bucket, key, download_path) + encrypt_pdf(download_path, upload_path) + encrypted_key = add_encrypted_suffix(key) + s3_client.upload_file(upload_path, f'{bucket}-encrypted', encrypted_key) + +# Define the function to encrypt the PDF file with a password +def encrypt_pdf(file_path, encrypted_file_path): + reader = PdfReader(file_path) + writer = PdfWriter() + + for page in reader.pages: + writer.add_page(page) + + # Add a password to the new PDF + # In this example, the password is hardcoded. + # In a production application, don't hardcode passwords or other sensitive information. + # We recommend you use AWS Secrets Manager to securely store passwords. + writer.encrypt("my-secret-password") + + # Save the new PDF to a file + with open(encrypted_file_path, "wb") as file: + writer.write(file) + +# Define a function to add a suffix to the original filename after encryption +def add_encrypted_suffix(original_key): + filename, extension = original_key.rsplit('.', 1) + return f'{filename}_encrypted.{extension}' \ No newline at end of file diff --git a/sample-apps/file-processing-python/pytest.ini b/sample-apps/file-processing-python/pytest.ini new file mode 100644 index 00000000..715b6262 --- /dev/null +++ b/sample-apps/file-processing-python/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +markers = + order: specify test execution order \ No newline at end of file diff --git a/sample-apps/file-processing-python/requirements.txt b/sample-apps/file-processing-python/requirements.txt new file mode 100644 index 00000000..e7c65556 --- /dev/null +++ b/sample-apps/file-processing-python/requirements.txt @@ -0,0 +1,2 @@ +boto3 +pypdf \ No newline at end of file diff --git a/sample-apps/file-processing-python/template.yaml b/sample-apps/file-processing-python/template.yaml new file mode 100644 index 00000000..0f40e195 --- /dev/null +++ b/sample-apps/file-processing-python/template.yaml @@ -0,0 +1,34 @@ +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 + +Resources: + EncryptPDFFunction: + Type: AWS::Serverless::Function + Properties: + FunctionName: EncryptPDF + Architectures: [x86_64] + CodeUri: ./ + Handler: lambda_function.lambda_handler + Runtime: python3.12 + Timeout: 15 + MemorySize: 256 + LoggingConfig: + LogFormat: JSON + Policies: + - AmazonS3FullAccess + Events: + S3Event: + Type: S3 + Properties: + Bucket: !Ref PDFSourceBucket + Events: s3:ObjectCreated:* + + PDFSourceBucket: + Type: AWS::S3::Bucket + Properties: + BucketName: EXAMPLE-BUCKET + + EncryptedPDFBucket: + Type: AWS::S3::Bucket + Properties: + BucketName: EXAMPLE-BUCKET-encrypted \ No newline at end of file diff --git a/sample-apps/file-processing-python/test_pdf_encrypt.py b/sample-apps/file-processing-python/test_pdf_encrypt.py new file mode 100644 index 00000000..a0da8210 --- /dev/null +++ b/sample-apps/file-processing-python/test_pdf_encrypt.py @@ -0,0 +1,104 @@ +import boto3 +import json +import pytest +import time +import os + +@pytest.fixture +def lambda_client(): + return boto3.client('lambda') + +@pytest.fixture +def s3_client(): + return boto3.client('s3') + +@pytest.fixture +def logs_client(): + return boto3.client('logs') + +@pytest.fixture(scope='session') +def cleanup(): + # Create a new S3 client for cleanup + s3_client = boto3.client('s3') + + yield + # Cleanup code will be executed after all tests have finished + + # Delete test.pdf from the source bucket + source_bucket = 'EXAMPLE-BUCKET' + source_file_key = 'test.pdf' + s3_client.delete_object(Bucket=source_bucket, Key=source_file_key) + print(f"\nDeleted {source_file_key} from {source_bucket}") + + # Delete test_encrypted.pdf from the destination bucket + destination_bucket = 'EXAMPLE-BUCKET-encrypted' + destination_file_key = 'test_encrypted.pdf' + s3_client.delete_object(Bucket=destination_bucket, Key=destination_file_key) + print(f"Deleted {destination_file_key} from {destination_bucket}") + + +@pytest.mark.order(1) +def test_source_bucket_available(s3_client): + s3_bucket_name = 'EXAMPLE-BUCKET' + file_name = 'test.pdf' + file_path = os.path.join(os.path.dirname(__file__), file_name) + + file_uploaded = False + try: + s3_client.upload_file(file_path, s3_bucket_name, file_name) + file_uploaded = True + except: + print("Error: couldn't upload file") + + assert file_uploaded, "Could not upload file to S3 bucket" + + + +@pytest.mark.order(2) +def test_lambda_invoked(logs_client): + + # Wait for a few seconds to make sure the logs are available + time.sleep(5) + + # Get the latest log stream for the specified log group + log_streams = logs_client.describe_log_streams( + logGroupName='/aws/lambda/EncryptPDF', + orderBy='LastEventTime', + descending=True, + limit=1 + ) + + latest_log_stream_name = log_streams['logStreams'][0]['logStreamName'] + + # Retrieve the log events from the latest log stream + log_events = logs_client.get_log_events( + logGroupName='/aws/lambda/EncryptPDF', + logStreamName=latest_log_stream_name + ) + + success_found = False + for event in log_events['events']: + message = json.loads(event['message']) + status = message.get('record', {}).get('status') + if status == 'success': + success_found = True + break + + assert success_found, "Lambda function execution did not report 'success' status in logs." + +@pytest.mark.order(3) +def test_encrypted_file_in_bucket(s3_client): + # Specify the destination S3 bucket and the expected converted file key + destination_bucket = 'EXAMPLE-BUCKET-encrypted' + converted_file_key = 'test_encrypted.pdf' + + try: + # Attempt to retrieve the metadata of the converted file from the destination S3 bucket + s3_client.head_object(Bucket=destination_bucket, Key=converted_file_key) + except s3_client.exceptions.ClientError as e: + # If the file is not found, the test will fail + pytest.fail(f"Converted file '{converted_file_key}' not found in the destination bucket: {str(e)}") + +def test_cleanup(cleanup): + # This test uses the cleanup fixture and will be executed last + pass \ No newline at end of file