Skip to content

Commit a25b61b

Browse files
authored
fix: ImageUtils.get_text() now encode to utf-8 (#35)
1 parent 5d239e0 commit a25b61b

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

core/utils/image_utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,5 @@ def get_text(image_path):
109109
image = Image.open(image_path).convert('LA')
110110
row_text = pytesseract.image_to_string(image, lang='eng',
111111
config="-c tessedit_char_whitelist=%s_-." % char_whitelist).strip()
112-
return "".join([s for s in row_text.splitlines(True) if s.strip()])
112+
text = "".join([s for s in row_text.splitlines(True) if s.strip()])
113+
return text.encode(encoding='utf-8', errors='ignore')

0 commit comments

Comments
 (0)