5
5
from aws_lambda_powertools import Logger , Tracer , Metrics
6
6
from aws_lambda_powertools .metrics import MetricUnit
7
7
from helpers .image_transformer import image_transformer
8
+ from helpers .pdf_transformer import pdf_transformer
8
9
from botocore .exceptions import ClientError
10
+ from langchain_core .prompts import PromptTemplate
9
11
10
12
11
13
18
20
19
21
@tracer .capture_method
20
22
def isvalid_file_format (file_name : str ) -> bool :
21
- file_format = ['.pdf' ,'.txt' ,'.jpg' ,'.png' ,'.csv ' ,'.docx' , '.ppt' , '.html' , '.jpeg ' ]
23
+ file_format = ['.pdf' ,'.txt' ,'.jpg' ,'.png' ,'.jpeg ' ,'.svg ' ]
22
24
if file_name .endswith (tuple (file_format )):
23
25
return True
24
26
else :
25
27
print (f'Invalid file format :: { file_format } ' )
26
28
return False
27
29
28
-
30
+ @tracer .capture_method
31
+ def transform_pdf_document (input_bucket : str ,file_name : str ,output_bucket : str ,output_file_name :str ):
32
+ document_content = pdf_transformer (input_bucket ,file_name )
33
+ if not document_content :
34
+ return 'Unable to load document'
35
+ else :
36
+ encoded_string = document_content .encode ("utf-8" )
37
+ s3 .Bucket (output_bucket ).put_object (Key = output_file_name , Body = encoded_string )
38
+ return 'File transformed'
29
39
30
40
@tracer .capture_method
31
41
def transform_image_document (input_bucket : str ,file_name : str ,output_bucket : str ):
@@ -40,19 +50,67 @@ def transform_image_document(input_bucket: str,file_name: str,output_bucket: str
40
50
image_details = {
41
51
"image_lables" :result_lables ,
42
52
"image_celeb" :result_celeb
43
- }
53
+ }
54
+
44
55
name , extension = os .path .splitext (file_name )
56
+
57
+ lables_txt = convert_lables_to_sentence (result_lables )
58
+ # with open ('/tmp/'+name+'.txt','w') as f:
59
+ # f.write(json.dumps(image_details))
60
+ # checking with senetence, save the senetence instead of lables
61
+
45
62
with open ('/tmp/' + name + '.txt' ,'w' ) as f :
46
- f .write (json .dumps (image_details ))
63
+ f .write (json .dumps (lables_txt ))
64
+
47
65
s3 .upload_file ('/tmp/' + name + '.txt' ,output_bucket ,name + ".txt" )
48
66
downloaded_file = download_file (input_bucket ,file_name )
49
67
print (f'downloaded_file:: { downloaded_file } ' )
50
68
51
69
resize_image = imt .image_resize ()
52
- upload_file (output_bucket ,resize_image )
70
+ upload_file (output_bucket ,resize_image , file_name )
53
71
#upload_file(output_bucket,file_name)
54
72
return 'File transformed'
55
73
74
+
75
+ @tracer .capture_method
76
+ def convert_lables_to_sentence (labels_str )-> str :
77
+ try :
78
+ print (f"lables:: { labels_str } " )
79
+ bedrock_client = boto3 .client ('bedrock-runtime' )
80
+
81
+ prompt = """\n \n Human: Here are the comma seperated list of labels seen in the image:
82
+ <labels>
83
+ {labels}
84
+ </labels>
85
+ Please provide a human readable and understandable summary based on these labels
86
+ \n \n Assistant:"""
87
+
88
+
89
+ prompt_template = PromptTemplate .from_template (prompt )
90
+ prompt_template_for_lables = prompt_template .format (labels = labels_str )
91
+
92
+ body = json .dumps ({"prompt" : prompt_template_for_lables ,
93
+ "max_tokens_to_sample" :300 ,
94
+ "temperature" :1 ,
95
+ "top_k" :250 ,
96
+ "top_p" :0.999 ,
97
+ "stop_sequences" :[]
98
+ })
99
+ modelId = 'anthropic.claude-v2'
100
+ accept = 'application/json'
101
+ contentType = 'application/json'
102
+
103
+ response = bedrock_client .invoke_model (body = body ,
104
+ modelId = modelId , accept = accept , contentType = contentType )
105
+ response_body = json .loads (response .get ('body' ).read ())
106
+ response_text_claud = response_body .get ('completion' )
107
+ print (f"response_text_claud:: { response_text_claud } " )
108
+ return response_text_claud
109
+ except Exception as exp :
110
+ print (f"Couldn't convert lables to sentence: { exp } " )
111
+
112
+
113
+
56
114
def download_file (bucket , object )-> str :
57
115
try :
58
116
file_path = "/tmp/" + os .path .basename (object )
@@ -64,10 +122,10 @@ def download_file(bucket, object )-> str:
64
122
except Exception as exp :
65
123
print (f"Couldn\' t download file : { exp } " )
66
124
67
- def upload_file (bucket , object )-> str :
125
+ def upload_file (bucket , file_name , key )-> str :
68
126
try :
69
- file_path = "/tmp/" + os .path .basename (object )
70
- s3 .upload_file (file_path , bucket ,object )
127
+ file_path = "/tmp/" + os .path .basename (file_name )
128
+ s3 .upload_file (file_path , bucket ,key )
71
129
return file_path
72
130
except ClientError as client_err :
73
131
print (f"Couldn\' t download file { client_err .response ['Error' ]['Message' ]} " )
0 commit comments