-
Notifications
You must be signed in to change notification settings - Fork 61
Sagemaker endpoint inferencing error with HF model loading from s3bucket with new transformer update #108
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Comments
is it possible to pass env "HF_TASK"?, when creating an endpoint in sagemaker, loading model from s3 bucket |
I got it working. but i believe it's bug, endpoint returns error ( ask to define How it worked for me:
I will keep this thread open incase someone can point out my mistake or correct me. which might be helpful for other |
@miteshkotak This is likely an issue with how you generated the archive file and/or your This message probably means that Sagemaker is using the default How does your archive file look like? How do you create your endpoint: |
Just got the same error # inference.py
import io
import logging
import sys
import json
from PIL import Image
import requests
import os
from transformers import LayoutLMForTokenClassification, AutoProcessor
import torch
os.system('chmod 777 /tmp') # https://discuss.huggingface.co/t/how-to-install-tesseract-ocr-in-a-training-dlc-of-hf-via-a-script/19251/6
os.system('apt-get update -y')
os.system('apt-get install tesseract-ocr -y')
logging.basicConfig(
level=logging.getLevelName("INFO"),
handlers=[
logging.StreamHandler(sys.stdout)
],
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
def model_fn(model_dir):
logging.info("Start loading model.")
processor = AutoProcessor.from_pretrained(
"microsoft/layoutlmv2-base-uncased", apply_ocr=True
)
model = LayoutLMForTokenClassification.from_pretrained(model_dir)
logging.info("Model and tokenizer loaded.")
return processor, model
def input_fn(input_data, content_type):
if content_type == "application/json":
data = json.loads(input_data)
urls = data.pop("urls", None)
if urls:
images = [get_image_from_url(url) for url in urls]
return images
else:
raise ValueError("No document url was provided.")
else:
raise ValueError("Content type {} is not supported.".format(content_type))
def predict_fn(images, model):
logging.info("Start predict_fn.")
processor, model = model
encoding = processor(
images,
return_tensors="pt",
padding="max_length",
truncation=True
)
del encoding["image"]
outputs = model(**encoding)
results = process_outputs(
outputs, encoding=encoding,
images=images, model=model,
processor=processor
)
return results
def get_image_from_url(url: str) -> Image:
response = requests.get(url)
f = io.BytesIO(response.content)
return Image.open(f).convert("RGB")
# Handle by processor
# def normalize_bbox(bbox, width, height):
# return [
# int(1000 * (bbox[0] / width)),
# int(1000 * (bbox[1] / height)),
# int(1000 * (bbox[2] / width)),
# int(1000 * (bbox[3] / height)),
# ]
def unnormalize_box(bbox, width, height):
return [
int(width * (bbox[0] / 1000)),
int(height * (bbox[1] / 1000)),
int(width * (bbox[2] / 1000)),
int(height * (bbox[3] / 1000)),
]
def _process_outputs(encoding, tokenizer, labels, scores, images):
results = []
for batch_idx, input_ids in enumerate(encoding["input_ids"]):
width, height = images[batch_idx].size
entities = []
previous_word_idx = 0
tokens = tokenizer.convert_ids_to_tokens(input_ids)
word_ids = encoding.word_ids(batch_index=batch_idx)
word = ""
for i, word_idx in enumerate(word_ids):
if word_idx is None:
continue
elif previous_word_idx != word_idx:
# +1 because of [CLS] token
entities.append(
{
"word": word,
"label": labels[batch_idx][previous_word_idx + 1],
"score": scores[batch_idx][previous_word_idx + 1],
"bbox": unnormalize_box(
encoding["bbox"][batch_idx][previous_word_idx + 1],
width=width,
height=height,
),
}
)
word = tokens[i]
else:
word += tokens[i]
previous_word_idx = word_idx
for entity in entities:
entity["word"] = entity["word"].replace("##", "")
results.append(entities)
return results
def process_outputs(outputs, encoding, images, model, processor):
scores, _ = torch.max(outputs.logits.softmax(axis=-1), dim=-1)
scores = scores.tolist()
predictions = outputs.logits.argmax(-1)
labels = [[model.config.id2label[pred.item()] for pred in prediction] for prediction in predictions]
results = _process_outputs(
encoding=encoding,
tokenizer=processor.tokenizer,
labels=labels,
scores=scores,
images=images,
)
return results
if __name__ == "__main__":
import boto3
BUCKET = "invoice-reader-project"
KEYS = ["data/training/documents/invoice_0.png", "data/training/documents/invoice_1.png"]
s3 = boto3.client("s3")
urls = []
for key in KEYS:
urls.append(
s3.generate_presigned_url(
ClientMethod="get_object",
Params={"Bucket": BUCKET, "Key": key},
ExpiresIn=3600,
)
)
payload = {"urls": urls}
results = predict_fn(
input_fn(input_data=json.dumps(payload), content_type="application/json"),
model_fn(model_dir="artifact")
)
print(results) |
Got it!
Stupid mistake, but mistake |
Uh oh!
There was an error while loading. Please reload this page.
Since the transformer update (4.35.0). model outputs model.safetensor by default instead of pytorch.bin
endpoint created in sagamaker with model saved in s3bucket is throwing error. asking to define
"HF_TASK"
.As per documentation of toolkit we do not need to define env variables when loading model from s3 Bucket, just a customer inference.py is needed.
Does someone know what's going wrong here ? @philschmid any idea ?
The text was updated successfully, but these errors were encountered: