diff --git a/core/utils/image_utils.py b/core/utils/image_utils.py index 1a61f7d9..28e4aaed 100644 --- a/core/utils/image_utils.py +++ b/core/utils/image_utils.py @@ -109,4 +109,5 @@ def get_text(image_path): image = Image.open(image_path).convert('LA') row_text = pytesseract.image_to_string(image, lang='eng', config="-c tessedit_char_whitelist=%s_-." % char_whitelist).strip() - return "".join([s for s in row_text.splitlines(True) if s.strip()]) + text = "".join([s for s in row_text.splitlines(True) if s.strip()]) + return text.encode(encoding='utf-8', errors='ignore')