Skip to content

Add code files for Python example file-processing app #464

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions sample-apps/file-processing-python/lambda_function.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from pypdf import PdfReader, PdfWriter
import uuid
import os
from urllib.parse import unquote_plus
import boto3

# Create the S3 client to download and upload objects from S3
s3_client = boto3.client('s3')

def lambda_handler(event, context):
# Iterate over the S3 event object and get the key for all uploaded files
for record in event['Records']:
bucket = record['s3']['bucket']['name']
key = unquote_plus(record['s3']['object']['key']) # Decode the S3 object key to remove any URL-encoded characters
download_path = f'/tmp/{uuid.uuid4()}.pdf' # Create a path in the Lambda tmp directory to save the file to
upload_path = f'/tmp/converted-{uuid.uuid4()}.pdf' # Create another path to save the encrypted file to

# If the file is a PDF, encrypt it and upload it to the destination S3 bucket
if key.lower().endswith('.pdf'):
s3_client.download_file(bucket, key, download_path)
encrypt_pdf(download_path, upload_path)
encrypted_key = add_encrypted_suffix(key)
s3_client.upload_file(upload_path, f'{bucket}-encrypted', encrypted_key)

# Define the function to encrypt the PDF file with a password
def encrypt_pdf(file_path, encrypted_file_path):
reader = PdfReader(file_path)
writer = PdfWriter()

for page in reader.pages:
writer.add_page(page)

# Add a password to the new PDF
writer.encrypt("my-secret-password")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggest mentioning that this is purely illustrative in a comment, as well as to use a service like Secrets Manager as a best practice.


# Save the new PDF to a file
with open(encrypted_file_path, "wb") as file:
writer.write(file)

# Define a function to add a suffix to the original filename after encryption
def add_encrypted_suffix(original_key):
filename, extension = original_key.rsplit('.', 1)
return f'{filename}_encrypted.{extension}'
3 changes: 3 additions & 0 deletions sample-apps/file-processing-python/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[pytest]
markers =
order: specify test execution order
2 changes: 2 additions & 0 deletions sample-apps/file-processing-python/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
boto3
pypdf
34 changes: 34 additions & 0 deletions sample-apps/file-processing-python/template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31

Resources:
EncryptPDFFunction:
Type: AWS::Serverless::Function
Properties:
FunctionName: EncryptPDF
Architectures: [x86_64]
CodeUri: ./
Handler: lambda_function.lambda_handler
Runtime: python3.12
Timeout: 15
MemorySize: 256
LoggingConfig:
LogFormat: JSON
Policies:
- AmazonS3FullAccess
Events:
S3Event:
Type: S3
Properties:
Bucket: !Ref PDFSourceBucket
Events: s3:ObjectCreated:*

PDFSourceBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: EXAMPLE-BUCKET

EncryptedPDFBucket:
Type: AWS::S3::Bucket
Properties:
BucketName: EXAMPLE-BUCKET-encrypted
104 changes: 104 additions & 0 deletions sample-apps/file-processing-python/test_pdf_encrypt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import boto3
import json
import pytest
import time
import os

@pytest.fixture
def lambda_client():
return boto3.client('lambda')

@pytest.fixture
def s3_client():
return boto3.client('s3')

@pytest.fixture
def logs_client():
return boto3.client('logs')

@pytest.fixture(scope='session')
def cleanup():
# Create a new S3 client for cleanup
s3_client = boto3.client('s3')

yield
# Cleanup code will be executed after all tests have finished

# Delete test.pdf from the source bucket
source_bucket = 'EXAMPLE-BUCKET'
source_file_key = 'test.pdf'
s3_client.delete_object(Bucket=source_bucket, Key=source_file_key)
print(f"\nDeleted {source_file_key} from {source_bucket}")

# Delete test_encrypted.pdf from the destination bucket
destination_bucket = 'EXAMPLE-BUCKET-encrypted'
destination_file_key = 'test_encrypted.pdf'
s3_client.delete_object(Bucket=destination_bucket, Key=destination_file_key)
print(f"Deleted {destination_file_key} from {destination_bucket}")


@pytest.mark.order(1)
def test_source_bucket_available(s3_client):
s3_bucket_name = 'EXAMPLE-BUCKET'
file_name = 'test.pdf'
file_path = os.path.join(os.path.dirname(__file__), file_name)

file_uploaded = False
try:
s3_client.upload_file(file_path, s3_bucket_name, file_name)
file_uploaded = True
except:
print("Error: couldn't upload file")

assert file_uploaded, "Could not upload file to S3 bucket"



@pytest.mark.order(2)
def test_lambda_invoked(logs_client):

# Wait for a few seconds to make sure the logs are available
time.sleep(5)

# Get the latest log stream for the specified log group
log_streams = logs_client.describe_log_streams(
logGroupName='/aws/lambda/EncryptPDF',
orderBy='LastEventTime',
descending=True,
limit=1
)

latest_log_stream_name = log_streams['logStreams'][0]['logStreamName']

# Retrieve the log events from the latest log stream
log_events = logs_client.get_log_events(
logGroupName='/aws/lambda/EncryptPDF',
logStreamName=latest_log_stream_name
)

success_found = False
for event in log_events['events']:
message = json.loads(event['message'])
status = message.get('record', {}).get('status')
if status == 'success':
success_found = True
break

assert success_found, "Lambda function execution did not report 'success' status in logs."

@pytest.mark.order(3)
def test_encrypted_file_in_bucket(s3_client):
# Specify the destination S3 bucket and the expected converted file key
destination_bucket = 'EXAMPLE-BUCKET-encrypted'
converted_file_key = 'test_encrypted.pdf'

try:
# Attempt to retrieve the metadata of the converted file from the destination S3 bucket
s3_client.head_object(Bucket=destination_bucket, Key=converted_file_key)
except s3_client.exceptions.ClientError as e:
# If the file is not found, the test will fail
pytest.fail(f"Converted file '{converted_file_key}' not found in the destination bucket: {str(e)}")

def test_cleanup(cleanup):
# This test uses the cleanup fixture and will be executed last
pass