Skip to content

Commit 3a64169

Browse files
committed
fix: encoding in Device.get_text()
1 parent a0082be commit 3a64169

File tree

2 files changed

+7
-6
lines changed

2 files changed

+7
-6
lines changed

core/utils/image_utils.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
import pytesseract
1414
from PIL import Image
1515

16-
from core.settings import Settings
17-
1816

1917
class ImageUtils(object):
2018
@staticmethod
@@ -112,7 +110,4 @@ def get_text(image_path):
112110
row_text = pytesseract.image_to_string(image, lang='eng',
113111
config="-c tessedit_char_whitelist=%s_-." % char_whitelist).strip()
114112
text = "".join([s for s in row_text.splitlines(True) if s.strip()])
115-
if Settings.PYTHON_VERSION < 3:
116-
return str(text.decode('utf8').encode('utf8')).strip()
117-
else:
118-
return text.decode("utf-8").strip()
113+
return text.encode(encoding='utf-8', errors='ignore')

core_tests/unit/utils/image_tests.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ class ImageUtilsTests(unittest.TestCase):
2222

2323
app_image = os.path.join(current_folder, 'resources', 'app.png')
2424
iphone_image = os.path.join(current_folder, 'resources', 'screenshot.png')
25+
unicode_image = os.path.join(current_folder, 'resources', 'unicode.png')
2526
blue = numpy.array([255, 188, 48])
2627
white = numpy.array([255, 255, 255])
2728

@@ -54,6 +55,11 @@ def test_04_get_text(self):
5455
assert 'Reminders' in text
5556
assert 'Settings' in text
5657

58+
# Unicode text
59+
text = ImageUtils.get_text(self.unicode_image)
60+
assert 'Ter Stegen' in text
61+
assert 'Neymar' in text
62+
5763

5864
if __name__ == '__main__':
5965
unittest.main()

0 commit comments

Comments
 (0)