Merge branch 'develop' of https://github.com/awslabs/aws-lambda-powertools-python into develop

heitorlessa · heitorlessa · commit 40b159aa5473 · 2022-01-20T07:46:40.000+01:00
* 'develop' of https://github.com/awslabs/aws-lambda-powertools-python: fix(batch): report multiple failures (aws-powertools#967) docs(tutorial): fix path to images (aws-powertools#963) Update documentation mistake (aws-powertools#961)
diff --git a/aws_lambda_powertools/utilities/batch/base.py b/aws_lambda_powertools/utilities/batch/base.py
@@ -385,7 +385,7 @@ def _clean(self):
             )
 
         messages = self._get_messages_to_report()
-        self.batch_response = {"batchItemFailures": [messages]}
+        self.batch_response = {"batchItemFailures": messages}
 
     def _has_messages_to_report(self) -> bool:
         if self.fail_messages:
@@ -397,7 +397,7 @@ def _has_messages_to_report(self) -> bool:
     def _entire_batch_failed(self) -> bool:
         return len(self.exceptions) == len(self.records)
 
-    def _get_messages_to_report(self) -> Dict[str, str]:
+    def _get_messages_to_report(self) -> List[Dict[str, str]]:
         """
         Format messages to use in batch deletion
         """
@@ -406,20 +406,25 @@ def _get_messages_to_report(self) -> Dict[str, str]:
     # Event Source Data Classes follow python idioms for fields
     # while Parser/Pydantic follows the event field names to the latter
     def _collect_sqs_failures(self):
-        if self.model:
-            return {"itemIdentifier": msg.messageId for msg in self.fail_messages}
-        return {"itemIdentifier": msg.message_id for msg in self.fail_messages}
+        failures = []
+        for msg in self.fail_messages:
+            msg_id = msg.messageId if self.model else msg.message_id
+            failures.append({"itemIdentifier": msg_id})
+        return failures
 
     def _collect_kinesis_failures(self):
-        if self.model:
-            # Pydantic model uses int but Lambda poller expects str
-            return {"itemIdentifier": msg.kinesis.sequenceNumber for msg in self.fail_messages}
-        return {"itemIdentifier": msg.kinesis.sequence_number for msg in self.fail_messages}
+        failures = []
+        for msg in self.fail_messages:
+            msg_id = msg.kinesis.sequenceNumber if self.model else msg.kinesis.sequence_number
+            failures.append({"itemIdentifier": msg_id})
+        return failures
 
     def _collect_dynamodb_failures(self):
-        if self.model:
-            return {"itemIdentifier": msg.dynamodb.SequenceNumber for msg in self.fail_messages}
-        return {"itemIdentifier": msg.dynamodb.sequence_number for msg in self.fail_messages}
+        failures = []
+        for msg in self.fail_messages:
+            msg_id = msg.dynamodb.SequenceNumber if self.model else msg.dynamodb.sequence_number
+            failures.append({"itemIdentifier": msg_id})
+        return failures
 
     @overload
     def _to_batch_type(self, record: dict, event_type: EventType, model: "BatchTypeModels") -> "BatchTypeModels":
diff --git a/docs/tutorial/index.md b/docs/tutorial/index.md
@@ -511,7 +511,7 @@ From here, we could [set specific keys](./core/logger.md#append_keys-method){tar
 By having structured logs like this, we can easily search and analyse them in [CloudWatch Logs Insight](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/AnalyzingLogData.html){target="_blank"}.
 
 === "CloudWatch Logs Insight Example"
-![CloudWatch Logs Insight Example](./media/cloudwatch_logs_insight_example.png)
+![CloudWatch Logs Insight Example](../media/cloudwatch_logs_insight_example.png)
 
 ## Tracing
 
@@ -625,7 +625,7 @@ We've made the following changes in `template.yaml` for this to work seamless:
 
 You can now build and deploy our updates with `sam build && sam deploy`. Once deployed, try invoking the application via the API endpoint, and visit [AWS X-Ray Console](https://console.aws.amazon.com/xray/home#/traces/){target="_blank"} to see how much progress we've made so far!!
 
-![AWS X-Ray Console trace view](./media/tracer_xray_sdk_showcase.png)
+![AWS X-Ray Console trace view](../media/tracer_xray_sdk_showcase.png)
 
 ### Enriching our generated traces
 
@@ -709,11 +709,11 @@ Let's break it down:
 
 Repeat the process of building, deploying, and invoking your application via the API endpoint. Within the [AWS X-Ray Console](https://console.aws.amazon.com/xray/home#/traces/){target="_blank"}, you should now be able to group traces by the `User` and `ColdStart` annotation.
 
-![Filtering traces by annotations](./media/tracer_xray_sdk_enriched.png)
+![Filtering traces by annotations](../media/tracer_xray_sdk_enriched.png)
 
 If you choose any of the traces available, try opening the `handler` subsegment and you should see the response of your Lambda function under the `Metadata` tab.
 
-![Filtering traces by metadata](./media/tracer_xray_sdk_enriched_2.png)
+![Filtering traces by metadata](../media/tracer_xray_sdk_enriched_2.png)
 
 ### Simplifying with Tracer
 
@@ -773,14 +773,14 @@ Lambda Powertools optimizes for Lambda compute environment. As such, we add thes
 Repeat the process of building, deploying, and invoking your application via the API endpoint. Within the [AWS X-Ray Console](https://console.aws.amazon.com/xray/home#/traces/){target="_blank"}, you should see a similar view:
 
 
-![AWS X-Ray Console trace view using Lambda Powertools Tracer](./media/tracer_utility_showcase_2.png)
+![AWS X-Ray Console trace view using Lambda Powertools Tracer](../media/tracer_utility_showcase_2.png)
 
 ???+ tip
     Consider using [Amazon CloudWatch ServiceLens view](https://console.aws.amazon.com/cloudwatch/home#servicelens:service-map/map){target="_blank"} as it aggregates AWS X-Ray traces and CloudWatch metrics and logs in one view.
 
 From here, you can browse to specific logs in CloudWatch Logs Insight, Metrics Dashboard or AWS X-Ray traces.
 
-![CloudWatch ServiceLens View](./media/tracer_utility_showcase_3.png)
+![CloudWatch ServiceLens View](../media/tracer_utility_showcase_3.png)
 
 ???+ info
     For more information on Amazon CloudWatch ServiceLens, please visit [link](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ServiceLens.html).
@@ -990,7 +990,7 @@ That's a lot less boilerplate code! Let's break this down:
 
 Repeat the process of building, deploying, and invoking your application via the API endpoint a few times to generate metrics - [Artillery](https://www.artillery.io/){target="_blank"} and [K6.io](https://k6.io/open-source){target="_blank"} are quick ways to generate some load. Within [CloudWatch Metrics view](https://console.aws.amazon.com/cloudwatch/home#metricsV2:graph=~()){target="_blank}, you should see `MyApp` custom namespace with your custom metrics there and `SuccessfulGreetings` available to graph.
 
-![Custom Metrics Example](./media/metrics_utility_showcase.png)
+![Custom Metrics Example](../media/metrics_utility_showcase.png)
 
 If you're curious about how the EMF portion of your function logs look like, you can quickly go to [CloudWatch ServiceLens view](https://console.aws.amazon.com/cloudwatch/home#servicelens:service-map/map){target="_blank"}, choose your function and open logs. You will see a similar entry that looks like this:
 
diff --git a/docs/utilities/batch.md b/docs/utilities/batch.md
@@ -921,7 +921,7 @@ class MyPartialProcessor(BasePartialProcessor):
 	def _clean(self):
 		# It's called once, *after* closing processing all records (closing the context manager)
 		# Here we're sending, at once, all successful messages to a ddb table
-		with ddb_table.batch_writer() as batch:
+		with self.ddb_table.batch_writer() as batch:
 			for result in self.success_messages:
 				batch.put_item(Item=result)
 
diff --git a/tests/functional/test_utilities_batch.py b/tests/functional/test_utilities_batch.py
@@ -414,7 +414,8 @@ def test_batch_processor_middleware_with_failure(sqs_event_factory, record_handl
     # GIVEN
     first_record = SQSRecord(sqs_event_factory("fail"))
     second_record = SQSRecord(sqs_event_factory("success"))
-    event = {"Records": [first_record.raw_event, second_record.raw_event]}
+    third_record = SQSRecord(sqs_event_factory("fail"))
+    event = {"Records": [first_record.raw_event, second_record.raw_event, third_record.raw_event]}
 
     processor = BatchProcessor(event_type=EventType.SQS)
 
@@ -426,7 +427,7 @@ def lambda_handler(event, context):
     result = lambda_handler(event, {})
 
     # THEN
-    assert len(result["batchItemFailures"]) == 1
+    assert len(result["batchItemFailures"]) == 2
 
 
 def test_batch_processor_context_success_only(sqs_event_factory, record_handler):
@@ -453,7 +454,8 @@ def test_batch_processor_context_with_failure(sqs_event_factory, record_handler)
     # GIVEN
     first_record = SQSRecord(sqs_event_factory("failure"))
     second_record = SQSRecord(sqs_event_factory("success"))
-    records = [first_record.raw_event, second_record.raw_event]
+    third_record = SQSRecord(sqs_event_factory("fail"))
+    records = [first_record.raw_event, second_record.raw_event, third_record.raw_event]
     processor = BatchProcessor(event_type=EventType.SQS)
 
     # WHEN
@@ -462,8 +464,10 @@ def test_batch_processor_context_with_failure(sqs_event_factory, record_handler)
 
     # THEN
     assert processed_messages[1] == ("success", second_record.body, second_record.raw_event)
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record.message_id}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [{"itemIdentifier": first_record.message_id}, {"itemIdentifier": third_record.message_id}]
+    }
 
 
 def test_batch_processor_kinesis_context_success_only(kinesis_event_factory, kinesis_record_handler):
@@ -491,8 +495,9 @@ def test_batch_processor_kinesis_context_with_failure(kinesis_event_factory, kin
     # GIVEN
     first_record = KinesisStreamRecord(kinesis_event_factory("failure"))
     second_record = KinesisStreamRecord(kinesis_event_factory("success"))
+    third_record = KinesisStreamRecord(kinesis_event_factory("failure"))
 
-    records = [first_record.raw_event, second_record.raw_event]
+    records = [first_record.raw_event, second_record.raw_event, third_record.raw_event]
     processor = BatchProcessor(event_type=EventType.KinesisDataStreams)
 
     # WHEN
@@ -501,15 +506,21 @@ def test_batch_processor_kinesis_context_with_failure(kinesis_event_factory, kin
 
     # THEN
     assert processed_messages[1] == ("success", b64_to_str(second_record.kinesis.data), second_record.raw_event)
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record.kinesis.sequence_number}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [
+            {"itemIdentifier": first_record.kinesis.sequence_number},
+            {"itemIdentifier": third_record.kinesis.sequence_number},
+        ]
+    }
 
 
 def test_batch_processor_kinesis_middleware_with_failure(kinesis_event_factory, kinesis_record_handler):
     # GIVEN
     first_record = KinesisStreamRecord(kinesis_event_factory("failure"))
     second_record = KinesisStreamRecord(kinesis_event_factory("success"))
-    event = {"Records": [first_record.raw_event, second_record.raw_event]}
+    third_record = KinesisStreamRecord(kinesis_event_factory("failure"))
+    event = {"Records": [first_record.raw_event, second_record.raw_event, third_record.raw_event]}
 
     processor = BatchProcessor(event_type=EventType.KinesisDataStreams)
 
@@ -521,7 +532,7 @@ def lambda_handler(event, context):
     result = lambda_handler(event, {})
 
     # THEN
-    assert len(result["batchItemFailures"]) == 1
+    assert len(result["batchItemFailures"]) == 2
 
 
 def test_batch_processor_dynamodb_context_success_only(dynamodb_event_factory, dynamodb_record_handler):
@@ -548,7 +559,8 @@ def test_batch_processor_dynamodb_context_with_failure(dynamodb_event_factory, d
     # GIVEN
     first_record = dynamodb_event_factory("failure")
     second_record = dynamodb_event_factory("success")
-    records = [first_record, second_record]
+    third_record = dynamodb_event_factory("failure")
+    records = [first_record, second_record, third_record]
     processor = BatchProcessor(event_type=EventType.DynamoDBStreams)
 
     # WHEN
@@ -557,15 +569,21 @@ def test_batch_processor_dynamodb_context_with_failure(dynamodb_event_factory, d
 
     # THEN
     assert processed_messages[1] == ("success", second_record["dynamodb"]["NewImage"]["Message"]["S"], second_record)
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [
+            {"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]},
+            {"itemIdentifier": third_record["dynamodb"]["SequenceNumber"]},
+        ]
+    }
 
 
 def test_batch_processor_dynamodb_middleware_with_failure(dynamodb_event_factory, dynamodb_record_handler):
     # GIVEN
     first_record = dynamodb_event_factory("failure")
     second_record = dynamodb_event_factory("success")
-    event = {"Records": [first_record, second_record]}
+    third_record = dynamodb_event_factory("failure")
+    event = {"Records": [first_record, second_record, third_record]}
 
     processor = BatchProcessor(event_type=EventType.DynamoDBStreams)
 
@@ -577,7 +595,7 @@ def lambda_handler(event, context):
     result = lambda_handler(event, {})
 
     # THEN
-    assert len(result["batchItemFailures"]) == 1
+    assert len(result["batchItemFailures"]) == 2
 
 
 def test_batch_processor_context_model(sqs_event_factory, order_event_factory):
@@ -639,17 +657,23 @@ def record_handler(record: OrderSqs):
     order_event = order_event_factory({"type": "success"})
     order_event_fail = order_event_factory({"type": "fail"})
     first_record = sqs_event_factory(order_event_fail)
+    third_record = sqs_event_factory(order_event_fail)
     second_record = sqs_event_factory(order_event)
-    records = [first_record, second_record]
+    records = [first_record, second_record, third_record]
 
     # WHEN
     processor = BatchProcessor(event_type=EventType.SQS, model=OrderSqs)
     with processor(records, record_handler) as batch:
         batch.process()
 
     # THEN
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["messageId"]}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [
+            {"itemIdentifier": first_record["messageId"]},
+            {"itemIdentifier": third_record["messageId"]},
+        ]
+    }
 
 
 def test_batch_processor_dynamodb_context_model(dynamodb_event_factory, order_event_factory):
@@ -726,16 +750,22 @@ def record_handler(record: OrderDynamoDBRecord):
     order_event_fail = order_event_factory({"type": "fail"})
     first_record = dynamodb_event_factory(order_event_fail)
     second_record = dynamodb_event_factory(order_event)
-    records = [first_record, second_record]
+    third_record = dynamodb_event_factory(order_event_fail)
+    records = [first_record, second_record, third_record]
 
     # WHEN
     processor = BatchProcessor(event_type=EventType.DynamoDBStreams, model=OrderDynamoDBRecord)
     with processor(records, record_handler) as batch:
         batch.process()
 
     # THEN
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [
+            {"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]},
+            {"itemIdentifier": third_record["dynamodb"]["SequenceNumber"]},
+        ]
+    }
 
 
 def test_batch_processor_kinesis_context_parser_model(kinesis_event_factory, order_event_factory):
@@ -807,16 +837,22 @@ def record_handler(record: OrderKinesisRecord):
 
     first_record = kinesis_event_factory(order_event_fail)
     second_record = kinesis_event_factory(order_event)
-    records = [first_record, second_record]
+    third_record = kinesis_event_factory(order_event_fail)
+    records = [first_record, second_record, third_record]
 
     # WHEN
     processor = BatchProcessor(event_type=EventType.KinesisDataStreams, model=OrderKinesisRecord)
     with processor(records, record_handler) as batch:
         batch.process()
 
     # THEN
-    assert len(batch.fail_messages) == 1
-    assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["kinesis"]["sequenceNumber"]}]}
+    assert len(batch.fail_messages) == 2
+    assert batch.response() == {
+        "batchItemFailures": [
+            {"itemIdentifier": first_record["kinesis"]["sequenceNumber"]},
+            {"itemIdentifier": third_record["kinesis"]["sequenceNumber"]},
+        ]
+    }
 
 
 def test_batch_processor_error_when_entire_batch_fails(sqs_event_factory, record_handler):