new: Add tests for SplitParser

ivyleavedtoadflax · ivyleavedtoadflax · commit ad6d6bba95ff · 2020-04-11T23:52:15.000-04:00
NOTE: SplitParser functionality is not completely implemented, so these
tests will need to be edited once it is.
diff --git a/deep_reference_parser/split_parse.py b/deep_reference_parser/split_parse.py
@@ -136,7 +136,13 @@ def split_parse(self, text, return_tokens=False, verbose=False):
 
             out = rows
 
-        #else:
+        else:
+
+            # TODO: return references with attributes (author, title, year)
+            # in json format. For now just return predictions as they are to
+            # allow testing of endpoints.
+
+            return preds
 
         #    # Otherwise convert the tokens into references and return
 
diff --git a/tests/test_deep_reference_parser_entrypoints.py b/tests/test_deep_reference_parser_entrypoints.py
@@ -3,8 +3,9 @@
 
 import pytest
 
-from deep_reference_parser.split import Splitter
 from deep_reference_parser.parse import Parser
+from deep_reference_parser.split import Splitter
+from deep_reference_parser.split_parse import SplitParser
 
 from .common import TEST_CFG, TEST_REFERENCES
 
@@ -19,6 +20,11 @@ def parser():
     return Parser(TEST_CFG)
 
 
+@pytest.fixture
+def split_parser():
+    return SplitParser(TEST_CFG)
+
+
 @pytest.fixture
 def text():
     with open(TEST_REFERENCES, "r") as fb:
@@ -53,17 +59,18 @@ def test_parser_list_output(text, parser):
     assert isinstance(out, list)
 
 
-# Allow to xfail as this depends on the model
-@pytest.mark.xfail
-def test_splitter_output_length(text, splitter):
+@pytest.mark.slow
+def test_split_parser_list_output(text, split_parser):
     """
-    For now use a minimal set of weights which may fail to predict anything
-    useful. Hence this test is xfailed.
+    Test that the parser entrypoint works as expected.
+
+    If the model artefacts and embeddings are not present this test will
+    downloaded them, which can be slow.
     """
-    out = splitter.split(text, return_tokens=False, verbose=False)
+    out = split_parser.split_parse(text, verbose=False)
+    print(out)
 
-    assert isinstance(out[0], str)
-    assert len(out) == 3
+    assert isinstance(out, list)
 
 
 def test_splitter_tokens_output(text, splitter):
@@ -88,3 +95,18 @@ def test_parser_tokens_output(text, parser):
     assert len(out[0]) == 2
     assert isinstance(out[0][0], str)
     assert isinstance(out[0][1], str)
+
+
+def test_split_parser_tokens_output(text, split_parser):
+    """
+    """
+    out = split_parser.split_parse(text, verbose=False)
+
+    assert isinstance(out, list)
+
+    # NOTE: full functionality of split_parse is not yet implemented.
+
+    # assert isinstance(out[0], tuple)
+    # assert len(out[0]) == 2
+    # assert isinstance(out[0][0], str)
+    # assert isinstance(out[0][1], str)