|
1 | 1 | import unittest
|
2 | 2 | from os import path
|
3 | 3 |
|
4 |
| -import cv2 |
5 |
| -import pdfplumber |
6 |
| -from PIL import Image |
7 |
| - |
8 | 4 | from ai_data_preprocessing_queue.Pipeline import Pipeline
|
9 |
| -from ai_data_preprocessing_queue.services.image_file_services import scale_too_big_cv2_image, scale_too_big_image |
10 | 5 |
|
11 | 6 | ABS_PATH_TEST_DATA = path.join(path.dirname(path.abspath(__file__)), "test_data")
|
12 | 7 |
|
@@ -125,65 +120,6 @@ def test_spellcheck_should_not_throw_exception_for_short_values(self) -> None:
|
125 | 120 | value = pipeline.consume("k koipe artikel")
|
126 | 121 | self.assertEqual("k kopie artikel", value)
|
127 | 122 |
|
128 |
| - def test_ocr_jpg_image(self) -> None: |
129 |
| - pipeline = Pipeline({"ocr": None}) |
130 |
| - state = {"image_to_string": {"lang": "eng", "config": "--psm 1"}} |
131 |
| - text = "" |
132 |
| - with Image.open(path.join(ABS_PATH_TEST_DATA, "test.jpg")) as image: |
133 |
| - image.load() |
134 |
| - image.convert("RGBA") |
135 |
| - text += pipeline.consume(image, state) |
136 |
| - self.assertIn("There is a test of optical character recognition.", text) |
137 |
| - |
138 |
| - def test_ocr_jpg_image_and_text_only(self) -> None: |
139 |
| - pipeline = Pipeline({"ocr": None, "text_only": None}) |
140 |
| - state = {"image_to_string": {"lang": "eng", "config": "--psm 1"}} |
141 |
| - text = "" |
142 |
| - with Image.open(path.join(ABS_PATH_TEST_DATA, "test.jpg")) as image: |
143 |
| - image.load() |
144 |
| - image.convert("RGBA") |
145 |
| - text += pipeline.consume(image, state) |
146 |
| - self.assertEqual(text.find("."), -1) |
147 |
| - |
148 |
| - def test_ocr_pdf_text_layer(self) -> None: |
149 |
| - pipeline = Pipeline({"ocr": None}) |
150 |
| - state = {"image_to_string": {"lang": "eng", "config": "--psm 1"}} |
151 |
| - text = "" |
152 |
| - with pdfplumber.open(path.join(ABS_PATH_TEST_DATA, "test.pdf")) as pdf: |
153 |
| - for page in pdf.pages: |
154 |
| - text += pipeline.consume(page, state) |
155 |
| - self.assertIn("There is a test of optical character recognition.", text) |
156 |
| - |
157 |
| - def test_ocr_tiff_cv2_image(self) -> None: |
158 |
| - image = cv2.imread(path.join(ABS_PATH_TEST_DATA, "test_180.tiff"), 0) |
159 |
| - pipeline = Pipeline({"ocr": None}) |
160 |
| - state = {"image_to_string": {"lang": "eng", "config": "--psm 1"}} |
161 |
| - text = pipeline.consume(image, state) |
162 |
| - self.assertIn("There is a test of optical character recognition.", text) |
163 |
| - |
164 |
| - def test_ocr_tiff_image_scaled(self) -> None: |
165 |
| - image = Image.open(path.join(ABS_PATH_TEST_DATA, "test_180.tiff")) |
166 |
| - pipeline = Pipeline({"ocr": None}) |
167 |
| - state = {"image_to_string": {"lang": "eng", "cut_of_size": 1700000}} |
168 |
| - text = pipeline.consume(image, state) |
169 |
| - self.assertIn("There is a test of optical character recognition.", text) |
170 |
| - |
171 |
| - def test_scale_tiff_image(self) -> None: |
172 |
| - image = Image.open(path.join(ABS_PATH_TEST_DATA, "test_180.tiff")) |
173 |
| - size = image.size[0] * image.size[1] |
174 |
| - scaling_factor = 10 |
175 |
| - image = scale_too_big_image(image, cutoff_size=size / scaling_factor) |
176 |
| - scaled_size = image.size[0] * image.size[1] * scaling_factor**2 |
177 |
| - self.assertAlmostEqual(1, scaled_size / size, places=1) |
178 |
| - |
179 |
| - def test_scale_tiff_cv2_image(self) -> None: |
180 |
| - image = cv2.imread(path.join(ABS_PATH_TEST_DATA, "test_180.tiff"), 0) |
181 |
| - size = image.shape[1] * image.shape[0] |
182 |
| - scaling_factor = 10 |
183 |
| - image = scale_too_big_cv2_image(image, cutoff_size=size / scaling_factor) |
184 |
| - scaled_size = image.shape[0] * image.shape[1] * scaling_factor**2 |
185 |
| - self.assertAlmostEqual(1, scaled_size / size, places=1) |
186 |
| - |
187 | 123 |
|
188 | 124 | if __name__ == "__main__":
|
189 | 125 | unittest.main()
|
0 commit comments