File tree 2 files changed +7
-6
lines changed 2 files changed +7
-6
lines changed Original file line number Diff line number Diff line change 13
13
import pytesseract
14
14
from PIL import Image
15
15
16
- from core .settings import Settings
17
-
18
16
19
17
class ImageUtils (object ):
20
18
@staticmethod
@@ -112,7 +110,4 @@ def get_text(image_path):
112
110
row_text = pytesseract .image_to_string (image , lang = 'eng' ,
113
111
config = "-c tessedit_char_whitelist=%s_-." % char_whitelist ).strip ()
114
112
text = "" .join ([s for s in row_text .splitlines (True ) if s .strip ()])
115
- if Settings .PYTHON_VERSION < 3 :
116
- return str (text .decode ('utf8' ).encode ('utf8' )).strip ()
117
- else :
118
- return text .decode ("utf-8" ).strip ()
113
+ return text .encode (encoding = 'utf-8' , errors = 'ignore' )
Original file line number Diff line number Diff line change @@ -22,6 +22,7 @@ class ImageUtilsTests(unittest.TestCase):
22
22
23
23
app_image = os .path .join (current_folder , 'resources' , 'app.png' )
24
24
iphone_image = os .path .join (current_folder , 'resources' , 'screenshot.png' )
25
+ unicode_image = os .path .join (current_folder , 'resources' , 'unicode.png' )
25
26
blue = numpy .array ([255 , 188 , 48 ])
26
27
white = numpy .array ([255 , 255 , 255 ])
27
28
@@ -54,6 +55,11 @@ def test_04_get_text(self):
54
55
assert 'Reminders' in text
55
56
assert 'Settings' in text
56
57
58
+ # Unicode text
59
+ text = ImageUtils .get_text (self .unicode_image )
60
+ assert 'Ter Stegen' in text
61
+ assert 'Neymar' in text
62
+
57
63
58
64
if __name__ == '__main__' :
59
65
unittest .main ()
You can’t perform that action at this time.
0 commit comments