3
3
4
4
import pytest
5
5
6
- from deep_reference_parser .split import Splitter
7
6
from deep_reference_parser .parse import Parser
7
+ from deep_reference_parser .split import Splitter
8
+ from deep_reference_parser .split_parse import SplitParser
8
9
9
10
from .common import TEST_CFG , TEST_REFERENCES
10
11
@@ -19,6 +20,11 @@ def parser():
19
20
return Parser (TEST_CFG )
20
21
21
22
23
+ @pytest .fixture
24
+ def split_parser ():
25
+ return SplitParser (TEST_CFG )
26
+
27
+
22
28
@pytest .fixture
23
29
def text ():
24
30
with open (TEST_REFERENCES , "r" ) as fb :
@@ -53,17 +59,18 @@ def test_parser_list_output(text, parser):
53
59
assert isinstance (out , list )
54
60
55
61
56
- # Allow to xfail as this depends on the model
57
- @pytest .mark .xfail
58
- def test_splitter_output_length (text , splitter ):
62
+ @pytest .mark .slow
63
+ def test_split_parser_list_output (text , split_parser ):
59
64
"""
60
- For now use a minimal set of weights which may fail to predict anything
61
- useful. Hence this test is xfailed.
65
+ Test that the parser entrypoint works as expected.
66
+
67
+ If the model artefacts and embeddings are not present this test will
68
+ downloaded them, which can be slow.
62
69
"""
63
- out = splitter .split (text , return_tokens = False , verbose = False )
70
+ out = split_parser .split_parse (text , verbose = False )
71
+ print (out )
64
72
65
- assert isinstance (out [0 ], str )
66
- assert len (out ) == 3
73
+ assert isinstance (out , list )
67
74
68
75
69
76
def test_splitter_tokens_output (text , splitter ):
@@ -88,3 +95,18 @@ def test_parser_tokens_output(text, parser):
88
95
assert len (out [0 ]) == 2
89
96
assert isinstance (out [0 ][0 ], str )
90
97
assert isinstance (out [0 ][1 ], str )
98
+
99
+
100
+ def test_split_parser_tokens_output (text , split_parser ):
101
+ """
102
+ """
103
+ out = split_parser .split_parse (text , verbose = False )
104
+
105
+ assert isinstance (out , list )
106
+
107
+ # NOTE: full functionality of split_parse is not yet implemented.
108
+
109
+ # assert isinstance(out[0], tuple)
110
+ # assert len(out[0]) == 2
111
+ # assert isinstance(out[0][0], str)
112
+ # assert isinstance(out[0][1], str)
0 commit comments