From b0c24120ce6e0b0dcd3d1eef9e71789b5d11316f Mon Sep 17 00:00:00 2001 From: Deniz Zorlu Date: Fri, 20 Aug 2021 11:16:12 -0700 Subject: [PATCH 1/2] log predict time properly --- src/sagemaker_huggingface_inference_toolkit/handler_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sagemaker_huggingface_inference_toolkit/handler_service.py b/src/sagemaker_huggingface_inference_toolkit/handler_service.py index c2d8e9d..86222ce 100644 --- a/src/sagemaker_huggingface_inference_toolkit/handler_service.py +++ b/src/sagemaker_huggingface_inference_toolkit/handler_service.py @@ -188,7 +188,7 @@ def transform_fn(self, model, input_data, content_type, accept): processed_data = self.preprocess(input_data, content_type) preprocess_time = time.time() - start_time predictions = self.predict(processed_data, model) - predict_time = time.time() - preprocess_time + predict_time = time.time() - preprocess_time - start_time response = self.postprocess(predictions, accept) logger.info( From 6a562d8685d4fded07493616b94139bef3cd2476 Mon Sep 17 00:00:00 2001 From: Philipp Schmid <32632186+philschmid@users.noreply.github.com> Date: Mon, 23 Aug 2021 10:47:53 +0200 Subject: [PATCH 2/2] added clean postprocess time --- src/sagemaker_huggingface_inference_toolkit/handler_service.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sagemaker_huggingface_inference_toolkit/handler_service.py b/src/sagemaker_huggingface_inference_toolkit/handler_service.py index 86222ce..661061b 100644 --- a/src/sagemaker_huggingface_inference_toolkit/handler_service.py +++ b/src/sagemaker_huggingface_inference_toolkit/handler_service.py @@ -190,11 +190,12 @@ def transform_fn(self, model, input_data, content_type, accept): predictions = self.predict(processed_data, model) predict_time = time.time() - preprocess_time - start_time response = self.postprocess(predictions, accept) + postprocess_time = time.time() - predict_time - preprocess_time - start_time logger.info( f"Preprocess time - {preprocess_time * 1000} ms\n" f"Predict time - {predict_time * 1000} ms\n" - f"Postprocess time - {(time.time() - predict_time) * 1000} ms" + f"Postprocess time - {postprocess_time * 1000} ms" ) return response