awslabs
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/__init__.py
Lines changed: 1 addition & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/base/__init__.py
Lines changed: 1 addition & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/base/__init__.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/base/base.py
Lines changed: 45 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/base/base.py
Lines changed: 45 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/__init__.py
Lines changed: 2 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/__init__.py
Lines changed: 2 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/claude.py
Lines changed: 134 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/claude.py
Lines changed: 134 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/titan.py
Lines changed: 77 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/bedrock/titan.py
Lines changed: 77 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/registry/__init__.py
Lines changed: 3 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/registry/__init__.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/registry/index.py
Lines changed: 33 additions & 0 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/adapters/registry/index.py
Lines changed: 33 additions & 0 deletions
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/__init__.py
Lines changed: 1 addition & 1 deletion b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/__init__.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/text_generation_llm_selector.py
Lines changed: 13 additions & 47 deletions b/‎lambda/aws-qa-appsync-opensearch/question_answering/src/llms/text_generation_llm_selector.py
Lines changed: 13 additions & 47 deletions
@@ -0,0 +1 @@
+from .bedrock import *
@@ -0,0 +1 @@
+from .base import ModelAdapter
@@ -0,0 +1,45 @@
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
+# with the License. A copy of the License is located at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
+# and limitations under the License.
+#
+import os
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.prompts.prompt import PromptTemplate
+
+
+class ModelAdapter:
+    def __init__(self, callback=None, modality='Text', model_kwargs={}):
+        self.model_kwargs = model_kwargs
+        self.modality = modality
+
+        self.callback_handler = callback
+
+        self.llm = self.get_llm(model_kwargs)
+
+    def get_llm(self, model_kwargs={}):
+        raise ValueError("llm must be implemented")
+
+    def get_embeddings_model(self, model_kwargs={}):
+        raise ValueError("embeddings must be implemented")
+
+    def get_prompt(self):
+
+        template = """
+
+        The following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+
+        {context}
+
+        Question: {question}"""
+
+        prompt_template = PromptTemplate(template=template, input_variables=["context", "question"])
+
+        return prompt_template
@@ -0,0 +1,2 @@
+from .claude import *
+from .titan import *
@@ -0,0 +1,134 @@
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
+# with the License. A copy of the License is located at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
+# and limitations under the License.
+#
+import boto3
+
+from langchain_community.llms import Bedrock
+from langchain_community.chat_models import BedrockChat
+from langchain.prompts.prompt import PromptTemplate
+
+from ..base import ModelAdapter
+from ..registry import registry
+
+
+class BedrockClaudeAdapter(ModelAdapter):
+    def __init__(self, model_id, *args, **kwargs):
+        self.model_id = model_id
+
+        super().__init__(*args, **kwargs)
+
+    def get_llm(self, model_kwargs={}):
+        bedrock = boto3.client('bedrock-runtime')
+
+        params = {}
+        if "temperature" in model_kwargs:
+            params["temperature"] = model_kwargs["temperature"]
+        if "top_p" in model_kwargs:
+            params["top_p"] = model_kwargs["top_p"]
+        if "max_tokens_to_sample" in model_kwargs:
+            params["max_tokens_to_sample"] = model_kwargs["max_tokens_to_sample"]
+        if "stop_sequences" in model_kwargs:
+            params["stop_sequences"] = model_kwargs["stop_sequences"]
+        if "top_k" in model_kwargs:
+            params["top_k"] = model_kwargs["top_k"]
+
+        params["anthropic_version"] = "bedrock-2023-05-31"
+
+        kwargs = {
+            "client": bedrock,
+            "model_id": self.model_id,
+            "model_kwargs": params,
+            "streaming": False 
+        }
+
+        if self.callback_handler:
+            kwargs["callbacks"] = self.callback_handler
+            kwargs["streaming"] = model_kwargs.get("streaming", False)
+
+        return Bedrock(
+            **kwargs
+        )
+
+    def get_prompt(self):
+        template = """
+
+        Human: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+
+        {context}
+
+        Question: {question}
+
+        Assistant:"""
+
+        return PromptTemplate(
+            template=template, input_variables=["context", "question"]
+        )
+
+# For claude v3, at the moment we need to use BedrockChat
+class BedrockClaudev3Adapter(ModelAdapter):
+    def __init__(self, model_id, *args, **kwargs):
+        self.model_id = model_id
+
+        super().__init__(*args, **kwargs)
+
+    def get_llm(self, model_kwargs={}):
+        bedrock = boto3.client('bedrock-runtime')
+
+        params = {}
+        if "temperature" in model_kwargs:
+            params["temperature"] = model_kwargs["temperature"]
+        if "top_p" in model_kwargs:
+            params["top_p"] = model_kwargs["top_p"]
+        if "max_tokens" in model_kwargs:
+            params["max_tokens"] = model_kwargs["max_tokens"]
+        if "stop_sequences" in model_kwargs:
+            params["stop_sequences"] = model_kwargs["stop_sequences"]
+        if "top_k" in model_kwargs:
+            params["top_k"] = model_kwargs["top_k"]
+
+        params["anthropic_version"] = "bedrock-2023-05-31"
+
+        kwargs = {
+            "client": bedrock,
+            "model_id": self.model_id,
+            "model_kwargs": params,
+            "streaming": False 
+        }
+
+        if self.callback_handler:
+            kwargs["callbacks"] = self.callback_handler
+            kwargs["streaming"] = model_kwargs.get("streaming", False)
+
+        return BedrockChat(
+            **kwargs
+        )
+
+    def get_prompt(self):
+        template = """
+
+        Human: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
+
+        {context}
+
+        Question: {question}
+
+        Assistant:"""
+
+        return PromptTemplate(
+            template=template, input_variables=["context", "question"]
+        )
+
+
+# Register the adapter
+registry.register(r"^Bedrock.anthropic.claude-v2*", BedrockClaudeAdapter)
+registry.register(r"^Bedrock.anthropic.claude-instant*", BedrockClaudeAdapter)
+registry.register(r"^Bedrock.anthropic.claude-3*", BedrockClaudev3Adapter)
@@ -0,0 +1,77 @@
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
+# with the License. A copy of the License is located at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
+# and limitations under the License.
+#
+import boto3
+from langchain.prompts.prompt import PromptTemplate
+
+from langchain_community.llms import Bedrock
+from langchain_community.embeddings import BedrockEmbeddings
+
+from ..base import ModelAdapter
+from ..registry import registry
+
+
+class BedrockTitanAdapter(ModelAdapter):
+    def __init__(self, model_id, *args, **kwargs):
+        self.model_id = model_id
+
+        super().__init__(*args, **kwargs)
+
+    def get_llm(self, model_kwargs={}):
+        bedrock = boto3.client('bedrock-runtime')
+
+        params = {}
+        if "temperature" in model_kwargs:
+            params["temperature"] = model_kwargs["temperature"]
+        if "topP" in model_kwargs:
+            params["topP"] = model_kwargs["topP"]
+        if "maxTokenCount" in model_kwargs:
+            params["maxTokenCount"] = model_kwargs["maxTokens"]
+        if "stopSequences" in model_kwargs:
+            params["stopSequences"] = model_kwargs["stopSequences"]
+
+        kwargs = {
+            "client": bedrock,
+            "model_id": self.model_id,
+            "model_kwargs": params,
+            "streaming": False 
+        }
+
+        if self.callback_handler:
+            kwargs["callbacks"] = self.callback_handler
+            kwargs["streaming"] = model_kwargs.get("streaming", False)
+
+        return Bedrock(
+            **kwargs
+        )
+    
+    def get_embeddings_model(self, model_kwargs={}):
+        bedrock = boto3.client('bedrock-runtime')
+
+        return BedrockEmbeddings(client=bedrock, model_id=self.model_id)
+
+    def get_prompt(self):
+        template = """Human: The following is a friendly conversation between a human and an AI. The AI is talkative and provides lots of specific details from its context. If the AI does not know the answer to a question, it truthfully says it does not know.
+
+        {context}
+
+        Question: {question}
+
+        Assistant:"""
+
+        return PromptTemplate(
+            template=template, input_variables=["context", "question"]
+        )
+
+# Register the adapter
+registry.register(r"^Bedrock.amazon.titan-t*", BedrockTitanAdapter)
+registry.register(r"^Bedrock.amazon.titan-e*", BedrockTitanAdapter)
@@ -0,0 +1,3 @@
+from .index import AdapterRegistry
+
+registry = AdapterRegistry()
@@ -0,0 +1,33 @@
+#
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
+# with the License. A copy of the License is located at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# or in the 'license' file accompanying this file. This file is distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions
+# and limitations under the License.
+#
+import re
+
+class AdapterRegistry:
+    def __init__(self):
+        # The registry is a dictionary where:
+        # Keys are compiled regular expressions
+        # Values are model IDs
+        self.registry = {}
+
+    def register(self, regex, model_id):
+        # Compiles the regex and stores it in the registry
+        self.registry[re.compile(regex)] = model_id
+
+    def get_adapter(self, model):
+        # Iterates over the registered regexes
+        for regex, adapter in self.registry.items():
+            # If a match is found, returns the associated model ID
+            if regex.match(model):
+                return adapter
+        # If no match is found, returns None
+        return None
@@ -1 +1 @@
-from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm,get_bedrock_fm
+from .text_generation_llm_selector import get_max_tokens,get_bedrock_fm
@@ -11,11 +11,12 @@
 # and limitations under the License.
 #
 from aiohttp import ClientError
-from langchain.llms.bedrock import Bedrock
+from langchain_community.llms import Bedrock
 from langchain_community.embeddings import BedrockEmbeddings
 import os
 import boto3
 from .helper import get_credentials
+from .types import Provider, BedrockModel, MAX_TOKENS_MAP
 
 from aws_lambda_powertools import Logger, Tracer, Metrics
 from aws_lambda_powertools.utilities.typing import LambdaContext
@@ -26,41 +27,6 @@
 tracer = Tracer(service="QUESTION_ANSWERING")
 metrics = Metrics(namespace="question_answering", service="QUESTION_ANSWERING")
 
-
-
-def get_llm(callbacks=None,model_id="anthropic.claude-v2:1"):
-    bedrock = boto3.client('bedrock-runtime')
-
-    params = {
-        "max_tokens_to_sample": 600,
-        "temperature": 0,
-        "top_k": 250,
-        "top_p": 1,
-        "stop_sequences": ["\\n\\nHuman:"],
-    }
-
-    kwargs = {
-        "client": bedrock,
-        "model_id": model_id,
-        "model_kwargs": params,
-        "streaming": False 
-    }
-
-    if callbacks:
-        kwargs["callbacks"] = callbacks
-        kwargs["streaming"] = True
-
-    return Bedrock(**kwargs)
-
-def get_embeddings_llm(model_id,modality):
-    bedrock = boto3.client('bedrock-runtime')
-    validation_status=validate_model_id_in_bedrock(model_id,modality)
-    if(validation_status['status']):
-        return BedrockEmbeddings(client=bedrock, model_id=model_id)
-    else:
-        return None
-
-
 def get_bedrock_fm(model_id,modality):
     bedrock_client = boto3.client('bedrock-runtime')
     validation_status= validate_model_id_in_bedrock(model_id,modality)
@@ -71,17 +37,17 @@ def get_bedrock_fm(model_id,modality):
         logger.error(f"reason ::{validation_status['message']} ")
         return None
 
-
-
-#TODO -add max token based on model id    
-def get_max_tokens(model_id):
-    match model_id:
-        case "anthropic.claude-v2:1":
-            return 200000
-        case "anthropic.claude-3-sonnet-20240229-v1:0":
-            return 200000
-        case _:
-            return 4096
+   
+def get_max_tokens(model):
+
+    # if model_id is not provided, we default to Claude v2
+    if not model:
+        return MAX_TOKENS_MAP[BedrockModel.ANTHROPIC_CLAUDE_V2_1]
+    try:
+        return MAX_TOKENS_MAP[model]
+    except:
+        logger.error('unable to get the max tokens for the specified model')
+        return -1
 
 
 def validate_model_id_in_bedrock(model_id,modality):
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .claude import *`
	`2`	`+from .titan import *`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from .index import AdapterRegistry`
	`2`	`+`
	`3`	`+registry = AdapterRegistry()`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-from .text_generation_llm_selector import get_llm, get_max_tokens, get_embeddings_llm,get_bedrock_fm`
	`1`	`+from .text_generation_llm_selector import get_max_tokens,get_bedrock_fm`