Skip to content

Commit ad6d6bb

Browse files
new: Add tests for SplitParser
NOTE: SplitParser functionality is not completely implemented, so these tests will need to be edited once it is.
1 parent 8e3a155 commit ad6d6bb

File tree

2 files changed

+38
-10
lines changed

2 files changed

+38
-10
lines changed

deep_reference_parser/split_parse.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,13 @@ def split_parse(self, text, return_tokens=False, verbose=False):
136136

137137
out = rows
138138

139-
#else:
139+
else:
140+
141+
# TODO: return references with attributes (author, title, year)
142+
# in json format. For now just return predictions as they are to
143+
# allow testing of endpoints.
144+
145+
return preds
140146

141147
# # Otherwise convert the tokens into references and return
142148

tests/test_deep_reference_parser_entrypoints.py

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33

44
import pytest
55

6-
from deep_reference_parser.split import Splitter
76
from deep_reference_parser.parse import Parser
7+
from deep_reference_parser.split import Splitter
8+
from deep_reference_parser.split_parse import SplitParser
89

910
from .common import TEST_CFG, TEST_REFERENCES
1011

@@ -19,6 +20,11 @@ def parser():
1920
return Parser(TEST_CFG)
2021

2122

23+
@pytest.fixture
24+
def split_parser():
25+
return SplitParser(TEST_CFG)
26+
27+
2228
@pytest.fixture
2329
def text():
2430
with open(TEST_REFERENCES, "r") as fb:
@@ -53,17 +59,18 @@ def test_parser_list_output(text, parser):
5359
assert isinstance(out, list)
5460

5561

56-
# Allow to xfail as this depends on the model
57-
@pytest.mark.xfail
58-
def test_splitter_output_length(text, splitter):
62+
@pytest.mark.slow
63+
def test_split_parser_list_output(text, split_parser):
5964
"""
60-
For now use a minimal set of weights which may fail to predict anything
61-
useful. Hence this test is xfailed.
65+
Test that the parser entrypoint works as expected.
66+
67+
If the model artefacts and embeddings are not present this test will
68+
downloaded them, which can be slow.
6269
"""
63-
out = splitter.split(text, return_tokens=False, verbose=False)
70+
out = split_parser.split_parse(text, verbose=False)
71+
print(out)
6472

65-
assert isinstance(out[0], str)
66-
assert len(out) == 3
73+
assert isinstance(out, list)
6774

6875

6976
def test_splitter_tokens_output(text, splitter):
@@ -88,3 +95,18 @@ def test_parser_tokens_output(text, parser):
8895
assert len(out[0]) == 2
8996
assert isinstance(out[0][0], str)
9097
assert isinstance(out[0][1], str)
98+
99+
100+
def test_split_parser_tokens_output(text, split_parser):
101+
"""
102+
"""
103+
out = split_parser.split_parse(text, verbose=False)
104+
105+
assert isinstance(out, list)
106+
107+
# NOTE: full functionality of split_parse is not yet implemented.
108+
109+
# assert isinstance(out[0], tuple)
110+
# assert len(out[0]) == 2
111+
# assert isinstance(out[0][0], str)
112+
# assert isinstance(out[0][1], str)

0 commit comments

Comments
 (0)