awslabs
diff --git a/‎apidocs/classes/QaAppsyncOpensearch.md
+9 b/‎apidocs/classes/QaAppsyncOpensearch.md
+9
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/lambda.py
+1 b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/lambda.py
+1
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/__init__.py
+1-1 b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/__init__.py
+1-1
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/text_generation_llm_selector.py
+64-8 b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/text_generation_llm_selector.py
+64-8
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/qa_agent/StreamingCallbackHandler.py
+74 b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/qa_agent/StreamingCallbackHandler.py
+74
@@ -34,6 +34,7 @@ The QaAppsyncOpensearch class.
 - [securityGroup](QaAppsyncOpensearch.md#securitygroup)
 - [stage](QaAppsyncOpensearch.md#stage)
 - [vpc](QaAppsyncOpensearch.md#vpc)
+- [CONSTRUCT\_SCHEMA\_UPDATE\_WARNING](QaAppsyncOpensearch.md#construct_schema_update_warning)
 - [usageMetricMap](QaAppsyncOpensearch.md#usagemetricmap)
 
 ### Methods
@@ -240,6 +241,14 @@ Returns the instance of ec2.IVpc used by the construct
 
 ___
 
+### CONSTRUCT\_SCHEMA\_UPDATE\_WARNING
+
+▪ `Static` `Readonly` **CONSTRUCT\_SCHEMA\_UPDATE\_WARNING**: ``"\n  Attention QaAppsyncOpensearch users, an update has been made to \n  the GraphQL schema.To ensure continued functionality, please review \n  and update your GraphQL mutations and subscriptions to align with \n  the new schema.This schema update enables enhanced capabilities \n  and optimizations,so adopting the changes is recommended. \n  Please refer to the construct documentation for details \n  on the schema changes and examples of updated GraphQL statements.\n  Reach out to the support team if you need assistance \n  updating your integration codebase.  \n  "``
+
+Construct warning
+
+___
+
 ### usageMetricMap
 
 ▪ `Static` `Protected` **usageMetricMap**: `Record`\<`string`, `number`\>
 
@@ -41,3 +41,4 @@ def handler(event,  context: LambdaContext) -> dict:
 
     print(f"llm_response is {llm_response}")
     return llm_response
+
@@ -1 +1 @@
-from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm
+from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm,get_bedrock_fm
@@ -10,6 +10,7 @@
 # OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
 # and limitations under the License.
 #
+from aiohttp import ClientError
 from langchain.llms.bedrock import Bedrock
 from langchain_community.embeddings import BedrockEmbeddings
 import os
@@ -26,7 +27,8 @@
 metrics = Metrics(namespace="question_answering", service="QUESTION_ANSWERING")
 
 
-def get_llm(callbacks=None):
+
+def get_llm(callbacks=None,model_id="anthropic.claude-v2:1"):
     bedrock = boto3.client('bedrock-runtime')
 
     params = {
@@ -39,7 +41,7 @@ def get_llm(callbacks=None):
 
     kwargs = {
         "client": bedrock,
-        "model_id": "anthropic.claude-v2:1",
+        "model_id": model_id,
         "model_kwargs": params,
         "streaming": False 
     }
@@ -50,10 +52,64 @@ def get_llm(callbacks=None):
 
     return Bedrock(**kwargs)
 
-def get_embeddings_llm():
+def get_embeddings_llm(model_id,modality):
     bedrock = boto3.client('bedrock-runtime')
-    return BedrockEmbeddings(client=bedrock, model_id="amazon.titan-embed-text-v1")
-    
-def get_max_tokens():
-    return 200000
-    
+    validation_status=validate_model_id_in_bedrock(model_id,modality)
+    if(validation_status['status']):
+        return BedrockEmbeddings(client=bedrock, model_id=model_id)
+    else:
+        return None
+
+
+def get_bedrock_fm(model_id,modality):
+    bedrock_client = boto3.client('bedrock-runtime')
+    validation_status= validate_model_id_in_bedrock(model_id,modality)
+    logger.info(f' validation_status :: {validation_status}')
+    if(validation_status['status']):
+        return bedrock_client
+    else:
+        logger.error(f"reason ::{validation_status['message']} ")
+        return None
+
+
+
+#TODO -add max token based on model id    
+def get_max_tokens(model_id):
+    match model_id:
+        case "anthropic.claude-v2:1":
+            return 200000
+        case "anthropic.claude-3-sonnet-20240229-v1:0":
+            return 200000
+        case _:
+            return 4096
+
+        
+def validate_model_id_in_bedrock(model_id,modality):
+        """
+        Validate if the listed model id is supported with given modality
+        in bedrock or not.
+        """
+        response={
+            "status":False,
+            "message":f"model {model_id} is not supported in bedrock."
+        }
+        try:
+            bedrock_client = boto3.client(service_name="bedrock")
+            bedrock_model_list = bedrock_client.list_foundation_models()
+            models = bedrock_model_list["modelSummaries"]
+            for model in models:
+                if model["modelId"].lower() == model_id.lower():   
+                    response["message"]=f"model {model_id} does not support modality {modality} "                 
+                    for inputModality in model["inputModalities"]:
+                        if inputModality.lower() == modality.lower():
+                            response["message"]=f"model {model_id} with modality {modality} is supported with bedrock "                 
+                            response["status"] = True
+
+            logger.info(f' response :: {response}')
+            return response         
+        except ClientError as ce:
+            message=f"error occured while validating model in bedrock {ce}"
+            logger.error(message)
+            response["status"] = False
+            response["message"] = message
+            return response     
@@ -0,0 +1,74 @@
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
+# with the License. A copy of the License is located at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
+# and limitations under the License.
+#
+from .helper import  send_job_status, JobStatus
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.schema import LLMResult
+import base64
+from typing import Any, Dict, List, Union
+
+from aws_lambda_powertools import Logger, Tracer, Metrics
+
+logger = Logger(service="QUESTION_ANSWERING")
+tracer = Tracer(service="QUESTION_ANSWERING")
+metrics = Metrics(namespace="question_answering", service="QUESTION_ANSWERING")
+
+class StreamingCallbackHandler(BaseCallbackHandler):
+    def __init__(self, status_variables: Dict):
+        self.status_variables = status_variables
+        logger.info("[StreamingCallbackHandler::__init__] Initialized")
+
+    def on_llm_start(self, serialized: Dict[str, Any], prompts: List[str], **kwargs: Any) -> None:
+        """Runs when streaming is started."""
+        logger.info(f"[StreamingCallbackHandler::on_llm_start] Streaming started!")
+
+    def on_llm_new_token(self, token: str, **kwargs: Any) -> None:
+        """Run on new LLM token. Only available when streaming is enabled."""
+        try:
+            logger.info(f'[StreamingCallbackHandler::on_llm_new_token] token is: {token}')
+            llm_answer_bytes = token.encode("utf-8")
+            base64_bytes = base64.b64encode(llm_answer_bytes)
+            llm_answer_base64_string = base64_bytes.decode("utf-8")
+
+            self.status_variables['jobstatus'] = JobStatus.STREAMING_NEW_TOKEN.status
+            self.status_variables['answer'] = llm_answer_base64_string
+            send_job_status(self.status_variables)
+
+        except Exception as err:
+            logger.exception(err)
+            self.status_variables['jobstatus'] = JobStatus.ERROR_PREDICTION.status
+            error = JobStatus.ERROR_PREDICTION.get_message()
+            self.status_variables['answer'] = error.decode("utf-8")
+            send_job_status(self.status_variables)
+
+    def on_llm_end(self, response: LLMResult, **kwargs: Any) -> None:
+        """Run when LLM ends running."""
+        logger.info(f"[StreamingCallbackHandler::on_llm_end] Streaming ended. Response: {response}")
+        try:
+            self.status_variables['jobstatus'] = JobStatus.STREAMING_ENDED.status
+            self.status_variables['answer'] = ""
+            send_job_status(self.status_variables)
+
+        except Exception as err:
+            logger.exception(err)
+            self.status_variables['jobstatus'] = JobStatus.ERROR_PREDICTION.status
+            error = JobStatus.ERROR_PREDICTION.get_message()
+            self.status_variables['answer'] = error.decode("utf-8")
+            send_job_status(self.status_variables)
+
+    def on_llm_error(self, error: Union[Exception, KeyboardInterrupt], **kwargs: Any) -> None:
+        """Run when LLM errors."""
+        logger.exception(error)
+        self.status_variables['jobstatus'] = JobStatus.ERROR_PREDICTION.status
+        error = JobStatus.ERROR_PREDICTION.get_message()
+        self.status_variables['answer'] = error.decode("utf-8")
+        send_job_status(self.status_variables)
Original file line number	Diff line number	Diff line change
`@@ -41,3 +41,4 @@ def handler(event, context: LambdaContext) -> dict:`
`41`	`41`
`42`	`42`	`print(f"llm_response is {llm_response}")`
`43`	`43`	`return llm_response`
	`44`	`+`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm`
	`1`	`+from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm,get_bedrock_fm`