From 2245ee9f55f95f3b8ea52b7626ac296a7d6ae0bb Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 4 Apr 2023 12:30:38 -0400 Subject: [PATCH 01/12] Move testing dependencies to setup.cfg MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves the packages listed in `tests/requirements.txt` into `setup.cfg`. This is done to: * Reduce complexity of downstream projects that wish to inherit testing dependencies. E.g. currently the `tests/requirements.txt` file is loaded via incorporating `case-utils` as a submodule in at least two projects (`case_exiftool`, `case_gnu_time`). * Expose test requirements for supply-chain review via package metadata. The feature name is for the test requirements is `testing`, inheriting the name from Python's `setuptools`. This feature is added knowing that, once added, it should not be removed, per this best practice in the PyPA guidance: > Best practice: if a project ends up no longer needing any other > packages to support a feature, it should keep an empty requirements > list for that feature in its extras_require argument, so that packages > depending on that feature don’t break (due to an invalid feature > name). There were no effects observed on Make-managed files. References: * https://github.com/casework/CASE-Implementation-ExifTool/blob/0.5.0/tests/Makefile#L43-L45 * https://github.com/casework/CASE-Implementation-GNU-Time/blob/0.7.0/tests/Makefile#L56-L58 * https://github.com/pypa/setuptools/blob/v67.6.1/setup.cfg#L42 * https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies Signed-off-by: Alex Nelson --- setup.cfg | 8 ++++++++ tests/Makefile | 8 ++------ tests/requirements.txt | 5 ----- 3 files changed, 10 insertions(+), 11 deletions(-) delete mode 100644 tests/requirements.txt diff --git a/setup.cfg b/setup.cfg index 7c0b0d9..e5ffda7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,14 @@ console_scripts = case_sparql_select = case_utils.case_sparql_select:main case_validate = case_utils.case_validate:main +[options.extras_require] +testing = + PyLD + mypy + pytest + python-dateutil + types-python-dateutil + [options.package_data] case_utils = py.typed case_utils.ontology = diff --git a/tests/Makefile b/tests/Makefile index 0716d1a..c1c626e 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,8 +31,7 @@ all: \ $(top_srcdir)/.git_submodule_init.done.log \ $(top_srcdir)/case_utils/__init__.py \ $(top_srcdir)/setup.cfg \ - $(top_srcdir)/setup.py \ - requirements.txt + $(top_srcdir)/setup.py rm -rf venv $(PYTHON3) -m venv \ venv @@ -42,14 +41,11 @@ all: \ pip \ setuptools \ wheel - source venv/bin/activate \ - && pip install \ - --requirement requirements.txt source venv/bin/activate \ && cd $(top_srcdir) \ && pip install \ --editable \ - . + .[testing] touch $@ all-case_utils: \ diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index f913d96..0000000 --- a/tests/requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -PyLD -mypy -pytest -python-dateutil -types-python-dateutil From 58e8a079ab1c52da1cc5702c2eab11ddf2dd12dd Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 4 Apr 2023 16:06:12 -0400 Subject: [PATCH 02/12] Add SHA3-256 and SHA3-512 to default generated hashes A follow-on patch will regenerate Make-managed files. References: * https://github.com/ucoProject/UCO/issues/526 Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 23 ++++++++++++++++---- tests/case_utils/case_file/test_case_file.py | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 9a4095f..e58fa92 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -49,6 +49,8 @@ class HashDict(typing.NamedTuple): sha1: str sha256: str sha512: str + sha3_256: str + sha3_512: str def create_file_node( @@ -141,6 +143,8 @@ def create_file_node( sha1obj = hashlib.sha1() sha256obj = hashlib.sha256() sha512obj = hashlib.sha512() + sha3_256obj = hashlib.sha3_256() + sha3_512obj = hashlib.sha3_512() stashed_error = None byte_tally = 0 with open(filepath, "rb") as in_fh: @@ -159,6 +163,8 @@ def create_file_node( sha1obj.update(buf) sha256obj.update(buf) sha512obj.update(buf) + sha3_256obj.update(buf) + sha3_512obj.update(buf) if stashed_error is not None: raise stashed_error current_hashdict = HashDict( @@ -167,6 +173,8 @@ def create_file_node( sha1obj.hexdigest(), sha256obj.hexdigest(), sha512obj.hexdigest(), + sha3_256obj.hexdigest(), + sha3_512obj.hexdigest(), ) if last_hashdict == current_hashdict: successful_hashdict = current_hashdict @@ -194,18 +202,25 @@ def create_file_node( # Add confirmed hashes into graph. for key in successful_hashdict._fields: - if key not in ("md5", "sha1", "sha256", "sha512"): + if key not in ("md5", "sha1", "sha256", "sha512", "sha3_256", "sha3_512"): continue n_hash = node_namespace["hash-" + case_utils.local_uuid.local_uuid()] graph.add((n_contentdata_facet, NS_UCO_OBSERVABLE.hash, n_hash)) graph.add((n_hash, NS_RDF.type, NS_UCO_TYPES.Hash)) + if key in ("sha3_256", "sha3_512"): + l_hash_method = rdflib.Literal( + key.replace("_", "-").upper(), + datatype=NS_UCO_VOCABULARY.HashNameVocab, + ) + else: + l_hash_method = rdflib.Literal( + key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab + ) graph.add( ( n_hash, NS_UCO_TYPES.hashMethod, - rdflib.Literal( - key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab - ), + l_hash_method, ) ) hash_value = getattr(successful_hashdict, key) diff --git a/tests/case_utils/case_file/test_case_file.py b/tests/case_utils/case_file/test_case_file.py index 75c9123..9194f7d 100644 --- a/tests/case_utils/case_file/test_case_file.py +++ b/tests/case_utils/case_file/test_case_file.py @@ -56,6 +56,8 @@ def test_confirm_hashes(graph_case_file: rdflib.Graph) -> None: "SHA1": "A94A8FE5CCB19BA61C4C0873D391E987982FBBD3", "SHA256": "9F86D081884C7D659A2FEAA0C55AD015A3BF4F1B2B0B822CD15D6C15B0F00A08", "SHA512": "EE26B0DD4AF7E749AA1A8EE3C10AE9923F618980772E473F8819A5D4940E0DB27AC185F8A0E1D5F84F88BC887FD67B143732C304CC5FA9AD8E6F57F50028A8FF", + "SHA3-256": "36F028580BB02CC8272A9A020F4200E346E276AE664E45EE80745574E2F5AB80", + "SHA3-512": "9ECE086E9BAC491FAC5C1D1046CA11D737B92A2B2EBD93F005D7B710110C0A678288166E7FBE796883A4F2E9B3CA9F484F521D0CE464345CC1AEC96779149C14", } computed = dict() From 945ddc20cb184e2a99171d6078f10aa799b2e426 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 4 Apr 2023 16:20:31 -0400 Subject: [PATCH 03/12] Regenerate Make-managed files Signed-off-by: Alex Nelson --- tests/case_utils/case_file/kb.json | 30 ++++ tests/case_utils/case_file/kb.ttl | 16 ++- .../case_file/sample.txt-nocompact.json | 124 +++++++++++----- tests/case_utils/case_file/sample.txt.json | 136 ++++++++++++------ tests/case_utils/case_file/sample.txt.ttl | 16 ++- 5 files changed, 244 insertions(+), 78 deletions(-) diff --git a/tests/case_utils/case_file/kb.json b/tests/case_utils/case_file/kb.json index 771a30b..5aa6bc9 100644 --- a/tests/case_utils/case_file/kb.json +++ b/tests/case_utils/case_file/kb.json @@ -23,6 +23,12 @@ }, { "@id": "kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932" + }, + { + "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e" + }, + { + "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93" } ], "uco-observable:sizeInBytes": { @@ -122,6 +128,30 @@ "@type": "xsd:hexBinary", "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" } + }, + { + "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA3-512" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + } + }, + { + "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA3-256" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" + } } ] } \ No newline at end of file diff --git a/tests/case_utils/case_file/kb.ttl b/tests/case_utils/case_file/kb.ttl index 0de60e3..566013e 100644 --- a/tests/case_utils/case_file/kb.ttl +++ b/tests/case_utils/case_file/kb.ttl @@ -14,7 +14,9 @@ kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , - kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 + kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , + kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , + kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 ; uco-observable:sizeInBytes "4"^^xsd:integer ; . @@ -70,3 +72,15 @@ kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; . +kb:hash-720759b8-9544-5dab-ab12-003372b17a4e + a uco-types:Hash ; + uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; + . + +kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 + a uco-types:Hash ; + uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; + . + diff --git a/tests/case_utils/case_file/sample.txt-nocompact.json b/tests/case_utils/case_file/sample.txt-nocompact.json index 8c59f44..efdb4b7 100644 --- a/tests/case_utils/case_file/sample.txt-nocompact.json +++ b/tests/case_utils/case_file/sample.txt-nocompact.json @@ -1,27 +1,76 @@ { "@context": { + "brick": "https://brickschema.org/schema/Brick#", + "csvw": "http://www.w3.org/ns/csvw#", + "dc": "http://purl.org/dc/elements/1.1/", + "dcam": "http://purl.org/dc/dcam/", + "dcat": "http://www.w3.org/ns/dcat#", + "dcmitype": "http://purl.org/dc/dcmitype/", + "dcterms": "http://purl.org/dc/terms/", + "doap": "http://usefulinc.com/ns/doap#", + "foaf": "http://xmlns.com/foaf/0.1/", + "geo": "http://www.opengis.net/ont/geosparql#", "kb": "http://example.org/kb/", + "odrl": "http://www.w3.org/ns/odrl/2/", + "org": "http://www.w3.org/ns/org#", "owl": "http://www.w3.org/2002/07/owl#", + "prof": "http://www.w3.org/ns/dx/prof/", + "prov": "http://www.w3.org/ns/prov#", + "qb": "http://purl.org/linked-data/cube#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "https://schema.org/", + "sh": "http://www.w3.org/ns/shacl#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "sosa": "http://www.w3.org/ns/sosa/", + "ssn": "http://www.w3.org/ns/ssn/", + "time": "http://www.w3.org/2006/time#", "uco-core": "https://ontology.unifiedcyberontology.org/uco/core/", "uco-observable": "https://ontology.unifiedcyberontology.org/uco/observable/", "uco-types": "https://ontology.unifiedcyberontology.org/uco/types/", "uco-vocabulary": "https://ontology.unifiedcyberontology.org/uco/vocabulary/", + "vann": "http://purl.org/vocab/vann/", + "void": "http://rdfs.org/ns/void#", + "wgs": "https://www.w3.org/2003/01/geo/wgs84_pos#", "xml": "http://www.w3.org/XML/1998/namespace", "xsd": "http://www.w3.org/2001/XMLSchema#" }, "@graph": [ { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb", + "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d", + "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", + "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ + { + "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8" + }, + { + "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb" + }, + { + "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894" + }, + { + "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6" + }, + { + "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8" + }, + { + "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912" + } + ], + "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 + }, + { + "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA1" + "@value": "SHA256" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" + "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" } }, { @@ -37,15 +86,37 @@ ] }, { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894", + "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA256" + "@value": "SHA3-512" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + } + }, + { + "@id": "http://example.org/kb/file-facet-a5d9606e-a5cf-5531-9462-5bed0ac4219c", + "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", + "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", + "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime", + "@value": "2010-01-02T03:04:56+00:00" + }, + "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 + }, + { + "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8", + "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", + "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { + "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", + "@value": "MD5" + }, + "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { + "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", + "@value": "098f6bcd4621d373cade4e832627b4f6" } }, { @@ -61,44 +132,27 @@ } }, { - "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d", - "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", - "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ - { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8" - }, - { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb" - }, - { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894" - }, - { - "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6" - } - ], - "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 - }, - { - "@id": "http://example.org/kb/file-facet-a5d9606e-a5cf-5531-9462-5bed0ac4219c", - "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", - "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", - "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { - "@type": "http://www.w3.org/2001/XMLSchema#dateTime", - "@value": "2010-01-02T03:04:56+00:00" + "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8", + "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", + "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { + "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", + "@value": "SHA3-256" }, - "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 + "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { + "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" + } }, { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8", + "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "MD5" + "@value": "SHA1" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "098f6bcd4621d373cade4e832627b4f6" + "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" } } ] diff --git a/tests/case_utils/case_file/sample.txt.json b/tests/case_utils/case_file/sample.txt.json index a4a97c8..7120e96 100644 --- a/tests/case_utils/case_file/sample.txt.json +++ b/tests/case_utils/case_file/sample.txt.json @@ -1,17 +1,65 @@ { "@context": { + "brick": "https://brickschema.org/schema/Brick#", + "csvw": "http://www.w3.org/ns/csvw#", + "dc": "http://purl.org/dc/elements/1.1/", + "dcam": "http://purl.org/dc/dcam/", + "dcat": "http://www.w3.org/ns/dcat#", + "dcmitype": "http://purl.org/dc/dcmitype/", + "dcterms": "http://purl.org/dc/terms/", + "doap": "http://usefulinc.com/ns/doap#", + "foaf": "http://xmlns.com/foaf/0.1/", + "geo": "http://www.opengis.net/ont/geosparql#", "kb": "http://example.org/kb/", + "odrl": "http://www.w3.org/ns/odrl/2/", + "org": "http://www.w3.org/ns/org#", "owl": "http://www.w3.org/2002/07/owl#", + "prof": "http://www.w3.org/ns/dx/prof/", + "prov": "http://www.w3.org/ns/prov#", + "qb": "http://purl.org/linked-data/cube#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", + "schema": "https://schema.org/", + "sh": "http://www.w3.org/ns/shacl#", + "skos": "http://www.w3.org/2004/02/skos/core#", + "sosa": "http://www.w3.org/ns/sosa/", + "ssn": "http://www.w3.org/ns/ssn/", + "time": "http://www.w3.org/2006/time#", "uco-core": "https://ontology.unifiedcyberontology.org/uco/core/", "uco-observable": "https://ontology.unifiedcyberontology.org/uco/observable/", "uco-types": "https://ontology.unifiedcyberontology.org/uco/types/", "uco-vocabulary": "https://ontology.unifiedcyberontology.org/uco/vocabulary/", + "vann": "http://purl.org/vocab/vann/", + "void": "http://rdfs.org/ns/void#", + "wgs": "https://www.w3.org/2003/01/geo/wgs84_pos#", "xml": "http://www.w3.org/XML/1998/namespace", "xsd": "http://www.w3.org/2001/XMLSchema#" }, "@graph": [ + { + "@id": "kb:file-789a91ef-6446-548c-9911-dcc5168f25ea", + "@type": "uco-observable:File", + "uco-core:hasFacet": [ + { + "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4" + }, + { + "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf" + } + ] + }, + { + "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA256" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + } + }, { "@id": "kb:hash-2cae4ae2-d773-5ea2-ba3e-2c4092574959", "@type": "uco-types:Hash", @@ -25,38 +73,52 @@ } }, { - "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795", + "@id": "kb:hash-b656d6d4-b99b-58c8-a487-9889713a8efe", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "MD5" + "@value": "SHA3-256" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "098f6bcd4621d373cade4e832627b4f6" + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" } }, { - "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4", - "@type": "uco-observable:FileFacet", - "uco-observable:fileName": "sample.txt", - "uco-observable:modifiedTime": { - "@type": "xsd:dateTime", - "@value": "2010-01-02T03:04:56+00:00" + "@id": "kb:hash-9652135a-b58d-592d-b0f4-ac684ecdf6ed", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA512" }, - "uco-observable:sizeInBytes": 4 + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" + } }, { - "@id": "kb:file-789a91ef-6446-548c-9911-dcc5168f25ea", - "@type": "uco-observable:File", - "uco-core:hasFacet": [ - { - "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4" - }, - { - "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf" - } - ] + "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA3-512" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + } + }, + { + "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "MD5" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "098f6bcd4621d373cade4e832627b4f6" + } }, { "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf", @@ -73,33 +135,25 @@ }, { "@id": "kb:hash-9652135a-b58d-592d-b0f4-ac684ecdf6ed" + }, + { + "@id": "kb:hash-b656d6d4-b99b-58c8-a487-9889713a8efe" + }, + { + "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d" } ], "uco-observable:sizeInBytes": 4 }, { - "@id": "kb:hash-9652135a-b58d-592d-b0f4-ac684ecdf6ed", - "@type": "uco-types:Hash", - "uco-types:hashMethod": { - "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA512" - }, - "uco-types:hashValue": { - "@type": "xsd:hexBinary", - "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" - } - }, - { - "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07", - "@type": "uco-types:Hash", - "uco-types:hashMethod": { - "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA256" + "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4", + "@type": "uco-observable:FileFacet", + "uco-observable:fileName": "sample.txt", + "uco-observable:modifiedTime": { + "@type": "xsd:dateTime", + "@value": "2010-01-02T03:04:56+00:00" }, - "uco-types:hashValue": { - "@type": "xsd:hexBinary", - "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" - } + "uco-observable:sizeInBytes": 4 } ] } \ No newline at end of file diff --git a/tests/case_utils/case_file/sample.txt.ttl b/tests/case_utils/case_file/sample.txt.ttl index 8f20b60..ddccd88 100644 --- a/tests/case_utils/case_file/sample.txt.ttl +++ b/tests/case_utils/case_file/sample.txt.ttl @@ -14,7 +14,9 @@ kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , - kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 + kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , + kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , + kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 ; uco-observable:sizeInBytes "4"^^xsd:integer ; . @@ -58,3 +60,15 @@ kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; . +kb:hash-720759b8-9544-5dab-ab12-003372b17a4e + a uco-types:Hash ; + uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; + . + +kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 + a uco-types:Hash ; + uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; + . + From 63c72feccb63a5c1d44652ee05e86909dba34f0a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 4 May 2023 09:05:47 -0400 Subject: [PATCH 04/12] Change start of NamedIndividuals' suffices to use last path-segment of class's IRI While drafting hand-written example data, it had proved beneficial to some drafters (myself included) to disambiguate `owl:NamedIndividual`s from `owl:Class`es by spelling the class differently in the IRI. Taking `uco-observable:FileFacet` as an example, it was originally frequently written as `FileFacet` when referring to the class, and `file-facet-...` when referring to an individual. Unfortunately, trying to carry this pattern forward is likely to create a technological burden. Camel casing can't always be assumed to apply straightforwardly, and would cause special-case logic to be needed. See e.g.: * `uco-observable:WifiAddressFacet` that would split on capital letters to `kb:wifi-address-facet`, which doesn't seem to be a problem; * `uco-location:GPSCoordinatesFacet` would induce `kb:g-p-s-coordinates-facet`, which seems far less obviously acceptable; * `uco-observable:HTTPConnectionFacet` splitting to `kb:h-t-t-p-connection-facet` may be the last convincing we need. Rather than invest in preserving the lowercased, hyphenated suffix scheme, this patch removes the question and now has individuals use the last path-segment of the class's IRI. A follow-on patch will regenerate Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index e58fa92..d5cb65a 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -70,7 +70,7 @@ def create_file_node( :param filepath: The path to the file to characterize. Can be relative or absolute. :type filepath: str - :param node_iri: The desired full IRI for the node. If absent, will make an IRI of the pattern ``ns_base + 'file-' + uuid4`` + :param node_iri: The desired full IRI for the node. If absent, will make an IRI of the pattern ``ns_base + 'File-' + uuid4`` :type node_iri: str :param node_prefix: The base prefix to use if node_iri is not supplied. @@ -88,7 +88,7 @@ def create_file_node( node_namespace = rdflib.Namespace(node_prefix) if node_iri is None: - node_slug = "file-" + case_utils.local_uuid.local_uuid() + node_slug = "File-" + case_utils.local_uuid.local_uuid() node_iri = node_namespace[node_slug] n_file = rdflib.URIRef(node_iri) graph.add((n_file, NS_RDF.type, NS_UCO_OBSERVABLE.File)) @@ -97,7 +97,7 @@ def create_file_node( literal_basename = rdflib.Literal(basename) file_stat = os.stat(filepath) - n_file_facet = node_namespace["file-facet-" + case_utils.local_uuid.local_uuid()] + n_file_facet = node_namespace["FileFacet-" + case_utils.local_uuid.local_uuid()] graph.add( ( n_file_facet, @@ -273,7 +273,7 @@ def main() -> None: context_dictionary = {k: v for (k, v) in graph.namespace_manager.namespaces()} serialize_kwargs["context"] = context_dictionary - node_iri = NS_BASE["file-" + case_utils.local_uuid.local_uuid()] + node_iri = NS_BASE["File-" + case_utils.local_uuid.local_uuid()] create_file_node( graph, args.in_file, From 30a557609d7603bbff0d7e5f8868b7910e104cca Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 4 May 2023 09:26:37 -0400 Subject: [PATCH 05/12] Regenerate Make-managed files Signed-off-by: Alex Nelson --- tests/case_utils/case_file/kb.json | 70 +++++------ tests/case_utils/case_file/kb.ttl | 40 +++--- .../case_file/sample.txt-disable_hashes.ttl | 6 +- .../case_file/sample.txt-nocompact.json | 118 +++++++++--------- tests/case_utils/case_file/sample.txt.json | 98 +++++++-------- tests/case_utils/case_file/sample.txt.ttl | 30 ++--- 6 files changed, 181 insertions(+), 181 deletions(-) diff --git a/tests/case_utils/case_file/kb.json b/tests/case_utils/case_file/kb.json index 5aa6bc9..a557e2f 100644 --- a/tests/case_utils/case_file/kb.json +++ b/tests/case_utils/case_file/kb.json @@ -9,54 +9,26 @@ }, "@graph": [ { - "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169", - "@type": "uco-observable:ContentDataFacet", - "uco-observable:hash": [ - { - "@id": "kb:hash-24644904-83ea-5911-aea8-be687a9f3caf" - }, - { - "@id": "kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21" - }, - { - "@id": "kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f" - }, - { - "@id": "kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932" - }, - { - "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e" - }, - { - "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93" - } - ], - "uco-observable:sizeInBytes": { - "@type": "xsd:integer", - "@value": "4" - } - }, - { - "@id": "kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9", + "@id": "kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9", "@type": "uco-observable:File", "uco-core:hasFacet": { - "@id": "kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d" + "@id": "kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d" } }, { - "@id": "kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04", + "@id": "kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04", "@type": "uco-observable:File", "uco-core:hasFacet": [ { - "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169" + "@id": "kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10" }, { - "@id": "kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10" + "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169" } ] }, { - "@id": "kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10", + "@id": "kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -69,7 +41,7 @@ } }, { - "@id": "kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d", + "@id": "kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -81,6 +53,34 @@ "@value": "4" } }, + { + "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169", + "@type": "uco-observable:ContentDataFacet", + "uco-observable:hash": [ + { + "@id": "kb:hash-24644904-83ea-5911-aea8-be687a9f3caf" + }, + { + "@id": "kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21" + }, + { + "@id": "kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f" + }, + { + "@id": "kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932" + }, + { + "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e" + }, + { + "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93" + } + ], + "uco-observable:sizeInBytes": { + "@type": "xsd:integer", + "@value": "4" + } + }, { "@id": "kb:hash-24644904-83ea-5911-aea8-be687a9f3caf", "@type": "uco-types:Hash", diff --git a/tests/case_utils/case_file/kb.ttl b/tests/case_utils/case_file/kb.ttl index 566013e..45ad25e 100644 --- a/tests/case_utils/case_file/kb.ttl +++ b/tests/case_utils/case_file/kb.ttl @@ -8,46 +8,46 @@ @prefix uco-vocabulary: . @prefix xsd: . -kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 - a uco-observable:ContentDataFacet ; - uco-observable:hash - kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , - kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , - kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , - kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , - kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , - kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 - ; - uco-observable:sizeInBytes "4"^^xsd:integer ; - . - -kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 +kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 a uco-observable:File ; - uco-core:hasFacet kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; + uco-core:hasFacet kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; . -kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04 +kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04 a uco-observable:File ; uco-core:hasFacet - kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 , - kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 + kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 , + kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 ; . -kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 +kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; uco-observable:sizeInBytes "4"^^xsd:integer ; . -kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d +kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; uco-observable:sizeInBytes "4"^^xsd:integer ; . +kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 + a uco-observable:ContentDataFacet ; + uco-observable:hash + kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , + kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , + kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , + kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , + kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , + kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 + ; + uco-observable:sizeInBytes "4"^^xsd:integer ; + . + kb:hash-24644904-83ea-5911-aea8-be687a9f3caf a uco-types:Hash ; uco-types:hashMethod "MD5"^^uco-vocabulary:HashNameVocab ; diff --git a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl index d9bc216..24add3e 100644 --- a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl +++ b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl @@ -6,12 +6,12 @@ @prefix uco-observable: . @prefix xsd: . -kb:file-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 +kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 a uco-observable:File ; - uco-core:hasFacet kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; + uco-core:hasFacet kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; . -kb:file-facet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d +kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; diff --git a/tests/case_utils/case_file/sample.txt-nocompact.json b/tests/case_utils/case_file/sample.txt-nocompact.json index efdb4b7..83e2ff3 100644 --- a/tests/case_utils/case_file/sample.txt-nocompact.json +++ b/tests/case_utils/case_file/sample.txt-nocompact.json @@ -37,48 +37,35 @@ }, "@graph": [ { - "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d", - "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", - "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ - { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8" - }, - { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb" - }, - { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894" - }, - { - "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6" - }, - { - "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8" - }, - { - "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912" - } - ], - "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 + "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8", + "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", + "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { + "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", + "@value": "SHA3-256" + }, + "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { + "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" + } }, { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894", + "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA256" + "@value": "SHA1" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" } }, { - "@id": "http://example.org/kb/file-23f45d80-7b16-5e7f-ba34-40392fa4f8fc", + "@id": "http://example.org/kb/File-23f45d80-7b16-5e7f-ba34-40392fa4f8fc", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/File", "https://ontology.unifiedcyberontology.org/uco/core/hasFacet": [ { - "@id": "http://example.org/kb/file-facet-a5d9606e-a5cf-5531-9462-5bed0ac4219c" + "@id": "http://example.org/kb/FileFacet-a5d9606e-a5cf-5531-9462-5bed0ac4219c" }, { "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d" @@ -86,73 +73,86 @@ ] }, { - "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912", + "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA3-512" + "@value": "SHA512" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" } }, { - "@id": "http://example.org/kb/file-facet-a5d9606e-a5cf-5531-9462-5bed0ac4219c", - "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", - "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", - "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { - "@type": "http://www.w3.org/2001/XMLSchema#dateTime", - "@value": "2010-01-02T03:04:56+00:00" - }, - "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 - }, - { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8", + "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "MD5" + "@value": "SHA256" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "098f6bcd4621d373cade4e832627b4f6" + "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" } }, { - "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6", + "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA512" + "@value": "MD5" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" + "@value": "098f6bcd4621d373cade4e832627b4f6" } }, { - "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8", - "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", - "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { - "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA3-256" + "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d", + "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", + "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ + { + "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8" + }, + { + "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb" + }, + { + "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894" + }, + { + "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6" + }, + { + "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8" + }, + { + "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912" + } + ], + "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 + }, + { + "@id": "http://example.org/kb/FileFacet-a5d9606e-a5cf-5531-9462-5bed0ac4219c", + "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", + "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", + "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { + "@type": "http://www.w3.org/2001/XMLSchema#dateTime", + "@value": "2010-01-02T03:04:56+00:00" }, - "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { - "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" - } + "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 }, { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb", + "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA1" + "@value": "SHA3-512" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" } } ] diff --git a/tests/case_utils/case_file/sample.txt.json b/tests/case_utils/case_file/sample.txt.json index 7120e96..e393a69 100644 --- a/tests/case_utils/case_file/sample.txt.json +++ b/tests/case_utils/case_file/sample.txt.json @@ -36,42 +36,6 @@ "xsd": "http://www.w3.org/2001/XMLSchema#" }, "@graph": [ - { - "@id": "kb:file-789a91ef-6446-548c-9911-dcc5168f25ea", - "@type": "uco-observable:File", - "uco-core:hasFacet": [ - { - "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4" - }, - { - "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf" - } - ] - }, - { - "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07", - "@type": "uco-types:Hash", - "uco-types:hashMethod": { - "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA256" - }, - "uco-types:hashValue": { - "@type": "xsd:hexBinary", - "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" - } - }, - { - "@id": "kb:hash-2cae4ae2-d773-5ea2-ba3e-2c4092574959", - "@type": "uco-types:Hash", - "uco-types:hashMethod": { - "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA1" - }, - "uco-types:hashValue": { - "@type": "xsd:hexBinary", - "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" - } - }, { "@id": "kb:hash-b656d6d4-b99b-58c8-a487-9889713a8efe", "@type": "uco-types:Hash", @@ -97,27 +61,37 @@ } }, { - "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d", + "@id": "kb:FileFacet-e6bb7192-3a63-561b-87c2-9aea090b49e4", + "@type": "uco-observable:FileFacet", + "uco-observable:fileName": "sample.txt", + "uco-observable:modifiedTime": { + "@type": "xsd:dateTime", + "@value": "2010-01-02T03:04:56+00:00" + }, + "uco-observable:sizeInBytes": 4 + }, + { + "@id": "kb:hash-2cae4ae2-d773-5ea2-ba3e-2c4092574959", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA3-512" + "@value": "SHA1" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" } }, { - "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795", + "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "MD5" + "@value": "SHA3-512" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "098f6bcd4621d373cade4e832627b4f6" + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" } }, { @@ -146,14 +120,40 @@ "uco-observable:sizeInBytes": 4 }, { - "@id": "kb:file-facet-e6bb7192-3a63-561b-87c2-9aea090b49e4", - "@type": "uco-observable:FileFacet", - "uco-observable:fileName": "sample.txt", - "uco-observable:modifiedTime": { - "@type": "xsd:dateTime", - "@value": "2010-01-02T03:04:56+00:00" + "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA256" }, - "uco-observable:sizeInBytes": 4 + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + } + }, + { + "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "MD5" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "098f6bcd4621d373cade4e832627b4f6" + } + }, + { + "@id": "kb:File-789a91ef-6446-548c-9911-dcc5168f25ea", + "@type": "uco-observable:File", + "uco-core:hasFacet": [ + { + "@id": "kb:FileFacet-e6bb7192-3a63-561b-87c2-9aea090b49e4" + }, + { + "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf" + } + ] } ] } \ No newline at end of file diff --git a/tests/case_utils/case_file/sample.txt.ttl b/tests/case_utils/case_file/sample.txt.ttl index ddccd88..8ca8973 100644 --- a/tests/case_utils/case_file/sample.txt.ttl +++ b/tests/case_utils/case_file/sample.txt.ttl @@ -8,6 +8,21 @@ @prefix uco-vocabulary: . @prefix xsd: . +kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04 + a uco-observable:File ; + uco-core:hasFacet + kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 , + kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 + ; + . + +kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 + a uco-observable:FileFacet ; + uco-observable:fileName "sample.txt" ; + uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; + uco-observable:sizeInBytes "4"^^xsd:integer ; + . + kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 a uco-observable:ContentDataFacet ; uco-observable:hash @@ -21,21 +36,6 @@ kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 uco-observable:sizeInBytes "4"^^xsd:integer ; . -kb:file-ace6460a-92a9-58b9-83ea-a18ae87f6e04 - a uco-observable:File ; - uco-core:hasFacet - kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 , - kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 - ; - . - -kb:file-facet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 - a uco-observable:FileFacet ; - uco-observable:fileName "sample.txt" ; - uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; - uco-observable:sizeInBytes "4"^^xsd:integer ; - . - kb:hash-24644904-83ea-5911-aea8-be687a9f3caf a uco-types:Hash ; uco-types:hashMethod "MD5"^^uco-vocabulary:HashNameVocab ; From 9397f13e4ad12feed8e68e2879b8bfcfb8c032bf Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 4 May 2023 09:32:36 -0400 Subject: [PATCH 06/12] Enable doctests on modules This patch enables review of Python code inlined in the `case-utils` modules' docstrings. An initial docstring test is also included, because `pytest` reports an error if it is called and no tests are found. References: * https://docs.pytest.org/en/7.1.x/how-to/doctest.html Signed-off-by: Alex Nelson --- case_utils/ontology/src/ontology_and_version_iris.py | 5 +++++ tests/Makefile | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/case_utils/ontology/src/ontology_and_version_iris.py b/case_utils/ontology/src/ontology_and_version_iris.py index 7ac429d..e85f0ff 100644 --- a/case_utils/ontology/src/ontology_and_version_iris.py +++ b/case_utils/ontology/src/ontology_and_version_iris.py @@ -29,6 +29,11 @@ def concept_is_cdo_concept(n_concept: rdflib.URIRef) -> bool: """ This function is purposefully distinct from the function used in case_validate. Within this script, the publishing history of CASE and UCO is reviewed. + + >>> concept_is_cdo_concept(rdflib.URIRef("http://example.org/ontology/Thing")) + False + >>> concept_is_cdo_concept(rdflib.URIRef("https://ontology.unifiedcyberontology.org/uco/core/UcoThing")) + True """ concept_iri = str(n_concept) return ( diff --git a/tests/Makefile b/tests/Makefile index c1c626e..f38243d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -59,6 +59,11 @@ check: \ check-mypy \ check-isomorphic_diff \ check-case_utils + source venv/bin/activate \ + && pytest \ + --doctest-modules \ + --log-level=DEBUG \ + $(top_srcdir)/case_utils source venv/bin/activate \ && pytest \ --ignore case_utils \ From 89642f59ec22334d2fa17daa0285e36e8eafce2b Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 3 May 2023 08:36:57 -0400 Subject: [PATCH 07/12] Add function sketch of inherent UUIDs Signed-off-by: Alex Nelson --- case_utils/inherent_uuid.py | 94 +++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 case_utils/inherent_uuid.py diff --git a/case_utils/inherent_uuid.py b/case_utils/inherent_uuid.py new file mode 100644 index 0000000..e4e3aba --- /dev/null +++ b/case_utils/inherent_uuid.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 + +# This software was developed at the National Institute of Standards +# and Technology by employees of the Federal Government in the course +# of their official duties. Pursuant to title 17 Section 105 of the +# United States Code this software is not subject to copyright +# protection and is in the public domain. NIST assumes no +# responsibility whatsoever for its use by other parties, and makes +# no guarantees, expressed or implied, about its quality, +# reliability, or any other characteristic. +# +# We would appreciate acknowledgement if the software is used. + +""" +This library provides supporting constants and functions for generating deterministic UUIDs (version 5) for UCO Hash and Facet nodes. +""" + +import binascii +import re +import uuid +from typing import Dict, Optional, Tuple + +from rdflib import Literal, URIRef + +from case_utils.namespace import NS_UCO_VOCABULARY, NS_XSD + +L_MD5 = Literal("MD5", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA1 = Literal("SHA1", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA256 = Literal("SHA256", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA3_256 = Literal("SHA3-256", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA3_512 = Literal("SHA3-512", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA384 = Literal("SHA384", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SHA512 = Literal("SHA512", datatype=NS_UCO_VOCABULARY.HashNameVocab) +L_SSDEEP = Literal("SSDEEP", datatype=NS_UCO_VOCABULARY.HashNameVocab) + +# Key: hashMethod literal. +# Value: Tuple. +# * Lowercase spelling +HASH_METHOD_CASTINGS: Dict[Literal, Tuple[str, Optional[int]]] = { + L_MD5: ("md5", 32), + L_SHA1: ("sha1", 40), + L_SHA256: ("sha256", 64), + L_SHA3_256: ("sha3-256", 64), + L_SHA3_512: ("sha3-512", 128), + L_SHA384: ("sha384", 96), + L_SHA512: ("sha512", 128), + L_SSDEEP: ("ssdeep", None), +} + +RX_UUID = re.compile( + "[0-9a-f]{8}-[0-9a-f]{4}-[0-5][0-9a-f]{3}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE +) + + +def inherence_uuid(n_node: URIRef) -> uuid.UUID: + node_iri = str(n_node) + if len(node_iri) < 40 or RX_UUID.search(node_iri) is None: + # <40 -> Too short to have a UUID and scheme. + return uuid.uuid5(uuid.NAMESPACE_URL, node_iri) + else: + return uuid.uuid5(uuid.NAMESPACE_OID, node_iri[-36:]) + + +def predicated_inherence_uuid( + node_inherence_uuid: uuid.UUID, n_predicate: URIRef +) -> uuid.UUID: + return uuid.uuid5(node_inherence_uuid, str(n_predicate)) + + +def facet_inherence_uuid( + predicated_inherence_uuid: uuid.UUID, n_facet_class: URIRef +) -> uuid.UUID: + return uuid.uuid5(predicated_inherence_uuid, str(n_facet_class)) + + +def hash_method_value_uuid(l_hash_method: Literal, l_hash_value: Literal) -> uuid.UUID: + """ + The UUIDv5 seed data for Hash nodes is a URN following the scheme in this draft IETF memo: + + https://datatracker.ietf.org/doc/html/draft-thiemann-hash-urn-01 + + Note that at the time of this writing, that memo was expired (expiration date 2004-03-04) and did not have a linked superseding document. + """ + + if l_hash_value.datatype != NS_XSD.hexBinary: + raise ValueError("Expected hexBinary datatype for l_hash_value.") + hash_value_str: str = binascii.hexlify(l_hash_value.toPython()).decode().lower() + + hash_method_str = HASH_METHOD_CASTINGS[l_hash_method][0] + + urn_template = "urn:hash::%s:%s" + urn_populated = urn_template % (hash_method_str, hash_value_str) + + return uuid.uuid5(uuid.NAMESPACE_URL, urn_populated) From f3501e7d438ade758223b3b8be774c8c80ad615a Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Wed, 3 May 2023 18:37:32 -0400 Subject: [PATCH 08/12] Restructure inherent UUID functions; add doctests and opt-in usage in case_file A follow-on patch will regenerate Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 61 +++++++-- case_utils/inherent_uuid.py | 184 ++++++++++++++++++++++++++-- tests/case_utils/case_file/Makefile | 8 ++ 3 files changed, 229 insertions(+), 24 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index d5cb65a..6ab4ab9 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -27,7 +27,7 @@ import rdflib -import case_utils +import case_utils.inherent_uuid from case_utils.namespace import ( NS_RDF, NS_UCO_CORE, @@ -60,6 +60,9 @@ def create_file_node( node_prefix: str = DEFAULT_PREFIX, disable_hashes: bool = False, disable_mtime: bool = False, + *args: typing.Any, + use_deterministic_uuids: bool = False, + **kwargs: typing.Any, ) -> rdflib.URIRef: r""" This function characterizes the file at filepath. @@ -70,7 +73,7 @@ def create_file_node( :param filepath: The path to the file to characterize. Can be relative or absolute. :type filepath: str - :param node_iri: The desired full IRI for the node. If absent, will make an IRI of the pattern ``ns_base + 'File-' + uuid4`` + :param node_iri: The desired full IRI for the node. If absent, will make an IRI of the pattern ``ns_base + 'File-' + uuid`` :type node_iri: str :param node_prefix: The base prefix to use if node_iri is not supplied. @@ -97,7 +100,15 @@ def create_file_node( literal_basename = rdflib.Literal(basename) file_stat = os.stat(filepath) - n_file_facet = node_namespace["FileFacet-" + case_utils.local_uuid.local_uuid()] + + n_file_facet: rdflib.URIRef + if use_deterministic_uuids: + n_file_facet = case_utils.inherent_uuid.get_facet_uriref( + n_file, NS_UCO_OBSERVABLE.FileFacet + ) + else: + n_file_facet = node_namespace["FileFacet-" + case_utils.local_uuid.local_uuid()] + graph.add( ( n_file_facet, @@ -124,9 +135,16 @@ def create_file_node( graph.add((n_file_facet, NS_UCO_OBSERVABLE.modifiedTime, literal_mtime)) if not disable_hashes: - n_contentdata_facet = node_namespace[ - "content-data-facet-" + case_utils.local_uuid.local_uuid() - ] + n_contentdata_facet: rdflib.URIRef + if use_deterministic_uuids: + n_contentdata_facet = case_utils.inherent_uuid.get_facet_uriref( + n_file, NS_UCO_OBSERVABLE.ContentDataFacet + ) + else: + n_contentdata_facet = node_namespace[ + "ContentDataFacet-" + case_utils.local_uuid.local_uuid() + ] + graph.add((n_file, NS_UCO_CORE.hasFacet, n_contentdata_facet)) graph.add( (n_contentdata_facet, NS_RDF.type, NS_UCO_OBSERVABLE.ContentDataFacet) @@ -204,9 +222,8 @@ def create_file_node( for key in successful_hashdict._fields: if key not in ("md5", "sha1", "sha256", "sha512", "sha3_256", "sha3_512"): continue - n_hash = node_namespace["hash-" + case_utils.local_uuid.local_uuid()] - graph.add((n_contentdata_facet, NS_UCO_OBSERVABLE.hash, n_hash)) - graph.add((n_hash, NS_RDF.type, NS_UCO_TYPES.Hash)) + + l_hash_method: rdflib.Literal if key in ("sha3_256", "sha3_512"): l_hash_method = rdflib.Literal( key.replace("_", "-").upper(), @@ -216,6 +233,23 @@ def create_file_node( l_hash_method = rdflib.Literal( key.upper(), datatype=NS_UCO_VOCABULARY.HashNameVocab ) + + hash_value: str = getattr(successful_hashdict, key) + l_hash_value = rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary) + + hash_uuid: str + if use_deterministic_uuids: + hash_uuid = str( + case_utils.inherent_uuid.hash_method_value_uuid( + l_hash_method, l_hash_value + ) + ) + else: + hash_uuid = case_utils.local_uuid.local_uuid() + n_hash = node_namespace["Hash-" + hash_uuid] + + graph.add((n_contentdata_facet, NS_UCO_OBSERVABLE.hash, n_hash)) + graph.add((n_hash, NS_RDF.type, NS_UCO_TYPES.Hash)) graph.add( ( n_hash, @@ -223,12 +257,11 @@ def create_file_node( l_hash_method, ) ) - hash_value = getattr(successful_hashdict, key) graph.add( ( n_hash, NS_UCO_TYPES.hashValue, - rdflib.Literal(hash_value.upper(), datatype=NS_XSD.hexBinary), + l_hash_value, ) ) @@ -241,6 +274,11 @@ def main() -> None: parser.add_argument("--debug", action="store_true") parser.add_argument("--disable-hashes", action="store_true") parser.add_argument("--disable-mtime", action="store_true") + parser.add_argument( + "--use-deterministic-uuids", + action="store_true", + help="Use UUIDs computed using the case_utils.inherent_uuid module.", + ) parser.add_argument( "--output-format", help="Override extension-based format guesser." ) @@ -281,6 +319,7 @@ def main() -> None: node_prefix=args.base_prefix, disable_hashes=args.disable_hashes, disable_mtime=args.disable_mtime, + use_deterministic_uuids=args.use_deterministic_uuids, ) graph.serialize(args.out_graph, **serialize_kwargs) diff --git a/case_utils/inherent_uuid.py b/case_utils/inherent_uuid.py index e4e3aba..832b448 100644 --- a/case_utils/inherent_uuid.py +++ b/case_utils/inherent_uuid.py @@ -13,16 +13,56 @@ """ This library provides supporting constants and functions for generating deterministic UUIDs (version 5) for UCO Hash and Facet nodes. + +There are two general patterns implemented: + +1. Some objects are "wholly specified" by their properties. The leading example of this is uco-types:Hash, which has only the properties hashMethod and hashValue, and both are required to be provided in order to be conformant with UCO. The function `hash_method_value_uuid` implements a scheme to generate UUIDs for uco-types:Hash nodes based on this pattern. +2. A pattern based on inherence generates UUIDv5s based on how an inherent object (a.k.a. UcoInherentCharacterizationThing) structurally relates to the object in which it inheres. For instance, a Facet is understood to only relate to its UcoObject by linking with the uco-core:hasFacet property. So, a Facet's UUID is determined uniquely by (1) the "UUID namespace" of its corresponding UcoObject, and (2) its OWL Class. + A. The term "UUID namespace" is described in RFC 4122 Section 4.3 [#rfc4122s43]_ , and is not intended be confused with `rdflib.term.Namespace`. For any uco-core:UcoThing (or even owl:Thing), the function `inherence_uuid` defines the procedure for either extracting or generating a UUID for use as a namespace. + +This module is independent of, and complements, `case_utils.local_uuid`, which provides deterministic UUIDs based on calling process's environment. + +References +========== + +.. [#rfc4122s43] https://datatracker.ietf.org/doc/html/rfc4122#section-4.3 + + +Examples +======== + +A knowledge base ontology currently uses a prefix 'kb:', expanding to 'http://example.org/kb/'. This knowledge base has a node kb:File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9. What is the IRI of its FileFacet? + +>>> from case_utils.namespace import NS_UCO_OBSERVABLE +>>> file_iri: str = "http://example.org/kb/File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9" +>>> n_file = URIRef(file_iri) +>>> n_file_facet = get_facet_uriref(n_file, NS_UCO_OBSERVABLE.FileFacet) +>>> n_file_facet +rdflib.term.URIRef('http://example.org/kb/FileFacet-01d292e3-0f38-5974-868d-006ef07f5186') + +A documentation policy change has been enacted, and now all knowledge base individuals need to use the URN example form. What is the FileFacet IRI now? + +>>> file_iri_2: str = "urn:example:kb:File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9" +>>> n_file_2 = URIRef(file_iri_2) +>>> n_file_facet_2 = get_facet_uriref(n_file_2, NS_UCO_OBSERVABLE.FileFacet) +>>> n_file_facet_2 +rdflib.term.URIRef('urn:example:kb:FileFacet-01d292e3-0f38-5974-868d-006ef07f5186') + +The two IRIs end with the same UUID. + +>>> assert str(n_file_facet)[-36:] == str(n_file_facet_2)[-36:] """ +__version__ = "0.0.2" + import binascii import re import uuid -from typing import Dict, Optional, Tuple +from typing import Any, Dict, Optional, Tuple -from rdflib import Literal, URIRef +from rdflib import Literal, Namespace, URIRef -from case_utils.namespace import NS_UCO_VOCABULARY, NS_XSD +from case_utils.namespace import NS_UCO_CORE, NS_UCO_VOCABULARY, NS_XSD L_MD5 = Literal("MD5", datatype=NS_UCO_VOCABULARY.HashNameVocab) L_SHA1 = Literal("SHA1", datatype=NS_UCO_VOCABULARY.HashNameVocab) @@ -52,29 +92,147 @@ ) -def inherence_uuid(n_node: URIRef) -> uuid.UUID: - node_iri = str(n_node) +def inherence_uuid(n_uco_thing: URIRef, *args: Any, **kwargs: Any) -> uuid.UUID: + """ + This function returns a UUIDv5 for any UcoThing, that can be used as a UUID Namespace in further `uuid.uuidv5` calls. + + In the case that the UcoThing ends with a UUID, that UUID string will be returned wrapped in a UUID object. In all other cases, a UUID version 5 object will be returned for the node as a name under the URL namespace [#rfc4122ac]_. + + References + ========== + + .. [#rfc4122ac] https://datatracker.ietf.org/doc/html/rfc4122#appendix-C + + Examples + ======== + + A File node will need its FileFacet IRI determined. What will be the base UUID namespace for determining this IRI as well as other inherent graph objects? + + >>> file_iri: str = "http://example.org/kb/File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9" + >>> n_file = URIRef(file_iri) + >>> file_uuid_namespace: uuid.UUID = inherence_uuid(n_file) + >>> file_uuid_namespace + UUID('ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9') + + The CASE homepage is being treated as an OWL NamedIndividual in this knowledge base, with its URL as its IRI. What is its base UUID namespace? + + >>> case_homepage_url: str = "https://caseontology.org/" + >>> n_case_homepage = URIRef(case_homepage_url) + >>> case_homepage_uuid_namespace = inherence_uuid(n_case_homepage) + >>> case_homepage_uuid_namespace + UUID('2c6406b7-3396-5fdd-b9bf-c6e21273e40a') + """ + node_iri = str(n_uco_thing) if len(node_iri) < 40 or RX_UUID.search(node_iri) is None: # <40 -> Too short to have a UUID and scheme. return uuid.uuid5(uuid.NAMESPACE_URL, node_iri) else: - return uuid.uuid5(uuid.NAMESPACE_OID, node_iri[-36:]) + return uuid.UUID(node_iri[-36:]) -def predicated_inherence_uuid( - node_inherence_uuid: uuid.UUID, n_predicate: URIRef +def facet_inherence_uuid( + uco_object_inherence_uuid: uuid.UUID, + n_facet_class: URIRef, + *args: Any, + **kwargs: Any ) -> uuid.UUID: - return uuid.uuid5(node_inherence_uuid, str(n_predicate)) + """ + :param n_facet_class: This node is expected to be the `rdflib.term.URIRef` for an OWL Class that is either in UCO or extends a class in UCO, such as `case_utils.namespace.NS_UCO_OBSERVABLE.FileFacet`. The Facet class SHOULD be a 'leaf' class - that is, it should have no OWL subclasses. (This 'SHOULD' might become a more stringent requirement in the future. uco-core:Facet must not be used. There is some question on how this rule should apply for uco-observable:WifiAddressFacet and its parent class uco-observable:MACAddressFacet.) + :type n_facet_class: rdflib.term.URIRef + """ + if n_facet_class == NS_UCO_CORE.Facet: + raise ValueError("Requested Facet class is not a leaf Facet class.") + # NOTE: Further reviewing whether n_facet_class pertains to a Facet subclass is not done in this library. Both a set of all such known classes, as well as an extension mechanism for non-standard Facet subclasses (probably either a Set or Graph as an extra parameter), would need to be implemented. -def facet_inherence_uuid( - predicated_inherence_uuid: uuid.UUID, n_facet_class: URIRef -) -> uuid.UUID: - return uuid.uuid5(predicated_inherence_uuid, str(n_facet_class)) + return uuid.uuid5(uco_object_inherence_uuid, str(n_facet_class)) + + +def guess_namespace(n_node: URIRef, *args: Any, **kwargs: Any) -> Namespace: + """ + This function attempts simple heuristics to extract from a URIRef its namespace IRI. The heuristics, being simple, are not necessarily going to provide desirable answers, and might be semantically incorrect. + + :rtype rdflib.Namespace: + + Examples + ======== + + >>> guess_namespace(URIRef("http://example.org/kb/Foo")) + Namespace('http://example.org/kb/') + >>> guess_namespace(URIRef("http://example.org/kb#Foo")) + Namespace('http://example.org/kb#') + >>> guess_namespace(URIRef("urn:example:kb#Foo")) + Namespace('urn:example:kb#') + + Note this function might not always give desirable answers. + + >>> guess_namespace(URIRef("urn:example:kb#Foo/Bar")) + Namespace('urn:example:kb#') + >>> guess_namespace(URIRef("urn:example:kb/Foo#Bar")) + Namespace('urn:example:kb/Foo#') + + Some patterns, such as Simple Storage Service (S3) URLs being treated as RDF IRIs, should avoid using this function. This object that houses a PCAP blob can function as an IRI, but the guessed namespace value does not serve as an RDF namespace. + + >>> guess_namespace(URIRef("s3://digitalcorpora/corpora/scenarios/2008-nitroba/nitroba.pcap")) + Namespace('s3://digitalcorpora/corpora/scenarios/2008-nitroba/') + """ + node_iri = str(n_node) + if "#" in node_iri: + namespace_iri = node_iri[: 1 + node_iri.rindex("#")] + elif "/" in node_iri: + namespace_iri = node_iri[: 1 + node_iri.rindex("/")] + else: + namespace_iri = node_iri[: 1 + node_iri.rindex(":")] + return Namespace(namespace_iri) + + +def get_facet_uriref( + n_uco_object: URIRef, + n_facet_class: URIRef, + *args: Any, + namespace: Optional[Namespace] = None, + **kwargs: Any +) -> URIRef: + """ + :param namespace: An optional RDFLib Namespace object to use for prefixing the Facet IRI with a knowledge base prefix IRI. If not provided, will be guessed from a right-truncation of n_uco_object under some namespace forms (hash-, then slash-, then colon-terminated). This is a potentially fragile guessing mechanism, so users should feel encouraged to provide this optional parameter. + :type namespace rdflib.Namespace or None: + + Examples + ======== + + What is the URLFacet pertaining to the Nitroba University Scenario's PCAP file, when being interpreted as a Simple Storage Service (S3) object? Note that this example will show that in some cases a (RDFLib) Namespace will be desired. + + >>> from case_utils.namespace import NS_UCO_OBSERVABLE + >>> pcap_url: str = "s3://digitalcorpora/corpora/scenarios/2008-nitroba/nitroba.pcap" + >>> n_pcap = URIRef(pcap_url) + >>> n_pcap_url_facet_try1 = get_facet_uriref(n_pcap, NS_UCO_OBSERVABLE.URLFacet) + >>> n_pcap_url_facet_try1 + rdflib.term.URIRef('s3://digitalcorpora/corpora/scenarios/2008-nitroba/URLFacet-4b6023da-dbc4-5e1e-9a2f-aca2a6f6405c') + >>> # Looks like a (RDFLib) Namespace object should be provided. + >>> ns_kb = Namespace("http://example.org/kb/") + >>> n_pcap_url_facet_try2 = get_facet_uriref(n_pcap, NS_UCO_OBSERVABLE.URLFacet, namespace=ns_kb) + >>> n_pcap_url_facet_try2 + rdflib.term.URIRef('http://example.org/kb/URLFacet-4b6023da-dbc4-5e1e-9a2f-aca2a6f6405c') + """ + uco_object_uuid_namespace: uuid.UUID = inherence_uuid(n_uco_object) + facet_uuid = facet_inherence_uuid(uco_object_uuid_namespace, n_facet_class) + + _namespace: Namespace + if namespace is None: + _namespace = guess_namespace(n_uco_object) + else: + _namespace = namespace + + # NOTE: This encodes an assumption that Facets (including extension Facets) use the "Slash" IRI style. + facet_class_local_name = str(n_facet_class).rsplit("/")[-1] + + return _namespace[facet_class_local_name + "-" + str(facet_uuid)] def hash_method_value_uuid(l_hash_method: Literal, l_hash_value: Literal) -> uuid.UUID: """ + This function generates a UUID for a UCO Hash object, solely based on its two required properties: uco-types:hashMethod and uco-types:hashValue. + The UUIDv5 seed data for Hash nodes is a URN following the scheme in this draft IETF memo: https://datatracker.ietf.org/doc/html/draft-thiemann-hash-urn-01 diff --git a/tests/case_utils/case_file/Makefile b/tests/case_utils/case_file/Makefile index 254a27d..91bf6d6 100644 --- a/tests/case_utils/case_file/Makefile +++ b/tests/case_utils/case_file/Makefile @@ -110,6 +110,7 @@ sample.txt.json: \ $(tests_srcdir)/src/compact.py \ $(tests_srcdir)/src/isomorphic_diff.py \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/inherent_uuid.py \ $(top_srcdir)/case_utils/local_uuid.py \ $(top_srcdir)/case_utils/namespace.py \ sample.txt-nocompact.json @@ -118,6 +119,7 @@ sample.txt.json: \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ --debug \ + --use-deterministic-uuids \ __$@ \ sample.txt source $(tests_srcdir)/venv/bin/activate \ @@ -143,6 +145,7 @@ sample.txt.ttl: \ $(RDF_TOOLKIT_JAR) \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/inherent_uuid.py \ $(top_srcdir)/case_utils/local_uuid.py \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log @@ -151,6 +154,7 @@ sample.txt.ttl: \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ --debug \ + --use-deterministic-uuids \ __$@ \ sample.txt java -jar $(RDF_TOOLKIT_JAR) \ @@ -167,6 +171,7 @@ sample.txt-disable_hashes.ttl: \ $(RDF_TOOLKIT_JAR) \ $(tests_srcdir)/.venv.done.log \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/inherent_uuid.py \ $(top_srcdir)/case_utils/local_uuid.py \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log @@ -176,6 +181,7 @@ sample.txt-disable_hashes.ttl: \ && case_file \ --debug \ --disable-hashes \ + --use-deterministic-uuids \ __$@ \ sample.txt java -jar $(RDF_TOOLKIT_JAR) \ @@ -193,6 +199,7 @@ sample.txt-nocompact.json: \ $(tests_srcdir)/.venv.done.log \ $(tests_srcdir)/src/isomorphic_diff.py \ $(top_srcdir)/case_utils/case_file/__init__.py \ + $(top_srcdir)/case_utils/inherent_uuid.py \ $(top_srcdir)/case_utils/local_uuid.py \ $(top_srcdir)/case_utils/namespace.py \ sample.txt.done.log @@ -201,6 +208,7 @@ sample.txt-nocompact.json: \ && source $(tests_srcdir)/venv/bin/activate \ && case_file \ --debug \ + --use-deterministic-uuids \ _$@ \ sample.txt # To avoid making noisy, uninformative updates from blank node identifiers, only move the new file into place if it is not isomorphic with the Git-tracked version of the target. From 48511fe75c636b10dcb8a96b7f60d7d6607006c8 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 4 May 2023 15:10:57 -0400 Subject: [PATCH 09/12] Regenerate Make-managed files Signed-off-by: Alex Nelson --- tests/case_utils/case_file/kb.json | 94 +++++++++---------- tests/case_utils/case_file/kb.ttl | 64 ++++++------- .../case_file/sample.txt-disable_hashes.ttl | 6 +- .../case_file/sample.txt-nocompact.json | 64 ++++++------- tests/case_utils/case_file/sample.txt.json | 68 +++++++------- tests/case_utils/case_file/sample.txt.ttl | 58 ++++++------ 6 files changed, 177 insertions(+), 177 deletions(-) diff --git a/tests/case_utils/case_file/kb.json b/tests/case_utils/case_file/kb.json index a557e2f..57549e7 100644 --- a/tests/case_utils/case_file/kb.json +++ b/tests/case_utils/case_file/kb.json @@ -9,26 +9,54 @@ }, "@graph": [ { - "@id": "kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9", + "@id": "kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8", + "@type": "uco-observable:ContentDataFacet", + "uco-observable:hash": [ + { + "@id": "kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337" + }, + { + "@id": "kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a" + }, + { + "@id": "kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93" + }, + { + "@id": "kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492" + }, + { + "@id": "kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72" + }, + { + "@id": "kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af" + } + ], + "uco-observable:sizeInBytes": { + "@type": "xsd:integer", + "@value": "4" + } + }, + { + "@id": "kb:File-23016945-cf7c-5c9c-9fe0-8cefc221ccb2", "@type": "uco-observable:File", "uco-core:hasFacet": { - "@id": "kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d" + "@id": "kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659" } }, { - "@id": "kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04", + "@id": "kb:File-fd45a350-f052-5558-b8f1-b4d7d82482a9", "@type": "uco-observable:File", "uco-core:hasFacet": [ { - "@id": "kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10" + "@id": "kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8" }, { - "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169" + "@id": "kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5" } ] }, { - "@id": "kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10", + "@id": "kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -41,7 +69,7 @@ } }, { - "@id": "kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d", + "@id": "kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -54,35 +82,7 @@ } }, { - "@id": "kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169", - "@type": "uco-observable:ContentDataFacet", - "uco-observable:hash": [ - { - "@id": "kb:hash-24644904-83ea-5911-aea8-be687a9f3caf" - }, - { - "@id": "kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21" - }, - { - "@id": "kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f" - }, - { - "@id": "kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932" - }, - { - "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e" - }, - { - "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93" - } - ], - "uco-observable:sizeInBytes": { - "@type": "xsd:integer", - "@value": "4" - } - }, - { - "@id": "kb:hash-24644904-83ea-5911-aea8-be687a9f3caf", + "@id": "kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -94,19 +94,19 @@ } }, { - "@id": "kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21", + "@id": "kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA1" + "@value": "SHA3-512" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" } }, { - "@id": "kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f", + "@id": "kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -118,7 +118,7 @@ } }, { - "@id": "kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932", + "@id": "kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -130,27 +130,27 @@ } }, { - "@id": "kb:hash-720759b8-9544-5dab-ab12-003372b17a4e", + "@id": "kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA3-512" + "@value": "SHA3-256" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" } }, { - "@id": "kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93", + "@id": "kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA3-256" + "@value": "SHA1" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" + "@value": "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3" } } ] diff --git a/tests/case_utils/case_file/kb.ttl b/tests/case_utils/case_file/kb.ttl index 45ad25e..eaf4727 100644 --- a/tests/case_utils/case_file/kb.ttl +++ b/tests/case_utils/case_file/kb.ttl @@ -8,79 +8,79 @@ @prefix uco-vocabulary: . @prefix xsd: . -kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 +kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8 + a uco-observable:ContentDataFacet ; + uco-observable:hash + kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337 , + kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a , + kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93 , + kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492 , + kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72 , + kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af + ; + uco-observable:sizeInBytes "4"^^xsd:integer ; + . + +kb:File-23016945-cf7c-5c9c-9fe0-8cefc221ccb2 a uco-observable:File ; - uco-core:hasFacet kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; + uco-core:hasFacet kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659 ; . -kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04 +kb:File-fd45a350-f052-5558-b8f1-b4d7d82482a9 a uco-observable:File ; uco-core:hasFacet - kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 , - kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 + kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8 , + kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5 ; . -kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 +kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5 a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; uco-observable:sizeInBytes "4"^^xsd:integer ; . -kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d +kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659 a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; uco-observable:sizeInBytes "4"^^xsd:integer ; . -kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 - a uco-observable:ContentDataFacet ; - uco-observable:hash - kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , - kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , - kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , - kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , - kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , - kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 - ; - uco-observable:sizeInBytes "4"^^xsd:integer ; - . - -kb:hash-24644904-83ea-5911-aea8-be687a9f3caf +kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337 a uco-types:Hash ; uco-types:hashMethod "MD5"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "098f6bcd4621d373cade4e832627b4f6"^^xsd:hexBinary ; . -kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 +kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a a uco-types:Hash ; - uco-types:hashMethod "SHA1"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"^^xsd:hexBinary ; + uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; . -kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f +kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93 a uco-types:Hash ; uco-types:hashMethod "SHA256"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"^^xsd:hexBinary ; . -kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 +kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492 a uco-types:Hash ; uco-types:hashMethod "SHA512"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; . -kb:hash-720759b8-9544-5dab-ab12-003372b17a4e +kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72 a uco-types:Hash ; - uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; + uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; . -kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 +kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af a uco-types:Hash ; - uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; + uco-types:hashMethod "SHA1"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"^^xsd:hexBinary ; . diff --git a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl index 24add3e..904ad82 100644 --- a/tests/case_utils/case_file/sample.txt-disable_hashes.ttl +++ b/tests/case_utils/case_file/sample.txt-disable_hashes.ttl @@ -6,12 +6,12 @@ @prefix uco-observable: . @prefix xsd: . -kb:File-800784de-5c9e-5eb2-b843-0ac51a1bd4b9 +kb:File-23016945-cf7c-5c9c-9fe0-8cefc221ccb2 a uco-observable:File ; - uco-core:hasFacet kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d ; + uco-core:hasFacet kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659 ; . -kb:FileFacet-ffa3e6bb-dffc-549d-a7c4-ffc5e90ac55d +kb:FileFacet-d086baca-c585-5b5e-b34d-4592bcddb659 a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; diff --git a/tests/case_utils/case_file/sample.txt-nocompact.json b/tests/case_utils/case_file/sample.txt-nocompact.json index 83e2ff3..d863fcb 100644 --- a/tests/case_utils/case_file/sample.txt-nocompact.json +++ b/tests/case_utils/case_file/sample.txt-nocompact.json @@ -37,19 +37,19 @@ }, "@graph": [ { - "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8", + "@id": "http://example.org/kb/Hash-7395c349-e7ab-59b4-acdd-9d857c32413a", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA3-256" + "@value": "SHA3-512" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" } }, { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb", + "@id": "http://example.org/kb/Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -61,19 +61,7 @@ } }, { - "@id": "http://example.org/kb/File-23f45d80-7b16-5e7f-ba34-40392fa4f8fc", - "@type": "https://ontology.unifiedcyberontology.org/uco/observable/File", - "https://ontology.unifiedcyberontology.org/uco/core/hasFacet": [ - { - "@id": "http://example.org/kb/FileFacet-a5d9606e-a5cf-5531-9462-5bed0ac4219c" - }, - { - "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d" - } - ] - }, - { - "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6", + "@id": "http://example.org/kb/Hash-acdba760-7dc3-5601-8d5f-480c1def8492", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -85,7 +73,7 @@ } }, { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894", + "@id": "http://example.org/kb/Hash-9527b473-6268-5385-9a74-4eb1a98fef93", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", @@ -97,44 +85,44 @@ } }, { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8", + "@id": "http://example.org/kb/Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "MD5" + "@value": "SHA3-256" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "098f6bcd4621d373cade4e832627b4f6" + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" } }, { - "@id": "http://example.org/kb/content-data-facet-bda9b72d-2753-54ab-9292-e1e260be4f6d", + "@id": "http://example.org/kb/ContentDataFacet-3258a70d-6307-5a70-b796-b6ffd530700a", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/ContentDataFacet", "https://ontology.unifiedcyberontology.org/uco/observable/hash": [ { - "@id": "http://example.org/kb/hash-8ebb651a-314b-554a-b63e-78b9e69111d8" + "@id": "http://example.org/kb/Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337" }, { - "@id": "http://example.org/kb/hash-c77cb4f1-ac2a-52c7-b67e-016e209515cb" + "@id": "http://example.org/kb/Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af" }, { - "@id": "http://example.org/kb/hash-bdb2ba7d-8cb2-5591-a051-0c20d134e894" + "@id": "http://example.org/kb/Hash-9527b473-6268-5385-9a74-4eb1a98fef93" }, { - "@id": "http://example.org/kb/hash-bf689e82-1cc4-507f-a6fb-7fc01b9289c6" + "@id": "http://example.org/kb/Hash-acdba760-7dc3-5601-8d5f-480c1def8492" }, { - "@id": "http://example.org/kb/hash-dd2161f0-4943-55c0-b08e-a2ad8a85dce8" + "@id": "http://example.org/kb/Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72" }, { - "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912" + "@id": "http://example.org/kb/Hash-7395c349-e7ab-59b4-acdd-9d857c32413a" } ], "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 }, { - "@id": "http://example.org/kb/FileFacet-a5d9606e-a5cf-5531-9462-5bed0ac4219c", + "@id": "http://example.org/kb/FileFacet-3e49a502-dc39-5ff2-9148-2601e1929e2b", "@type": "https://ontology.unifiedcyberontology.org/uco/observable/FileFacet", "https://ontology.unifiedcyberontology.org/uco/observable/fileName": "sample.txt", "https://ontology.unifiedcyberontology.org/uco/observable/modifiedTime": { @@ -144,15 +132,27 @@ "https://ontology.unifiedcyberontology.org/uco/observable/sizeInBytes": 4 }, { - "@id": "http://example.org/kb/hash-28dd6731-4eda-5ae7-9810-efedc7593912", + "@id": "http://example.org/kb/File-a28858e3-5087-5c84-a2b6-710b00b15605", + "@type": "https://ontology.unifiedcyberontology.org/uco/observable/File", + "https://ontology.unifiedcyberontology.org/uco/core/hasFacet": [ + { + "@id": "http://example.org/kb/FileFacet-3e49a502-dc39-5ff2-9148-2601e1929e2b" + }, + { + "@id": "http://example.org/kb/ContentDataFacet-3258a70d-6307-5a70-b796-b6ffd530700a" + } + ] + }, + { + "@id": "http://example.org/kb/Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337", "@type": "https://ontology.unifiedcyberontology.org/uco/types/Hash", "https://ontology.unifiedcyberontology.org/uco/types/hashMethod": { "@type": "https://ontology.unifiedcyberontology.org/uco/vocabulary/HashNameVocab", - "@value": "SHA3-512" + "@value": "MD5" }, "https://ontology.unifiedcyberontology.org/uco/types/hashValue": { "@type": "http://www.w3.org/2001/XMLSchema#hexBinary", - "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + "@value": "098f6bcd4621d373cade4e832627b4f6" } } ] diff --git a/tests/case_utils/case_file/sample.txt.json b/tests/case_utils/case_file/sample.txt.json index e393a69..ddd6f39 100644 --- a/tests/case_utils/case_file/sample.txt.json +++ b/tests/case_utils/case_file/sample.txt.json @@ -37,31 +37,19 @@ }, "@graph": [ { - "@id": "kb:hash-b656d6d4-b99b-58c8-a487-9889713a8efe", + "@id": "kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA3-256" - }, - "uco-types:hashValue": { - "@type": "xsd:hexBinary", - "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" - } - }, - { - "@id": "kb:hash-9652135a-b58d-592d-b0f4-ac684ecdf6ed", - "@type": "uco-types:Hash", - "uco-types:hashMethod": { - "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA512" + "@value": "SHA256" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" + "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" } }, { - "@id": "kb:FileFacet-e6bb7192-3a63-561b-87c2-9aea090b49e4", + "@id": "kb:FileFacet-0f05ce89-c779-5bc7-a7f1-3521c895946b", "@type": "uco-observable:FileFacet", "uco-observable:fileName": "sample.txt", "uco-observable:modifiedTime": { @@ -71,7 +59,7 @@ "uco-observable:sizeInBytes": 4 }, { - "@id": "kb:hash-2cae4ae2-d773-5ea2-ba3e-2c4092574959", + "@id": "kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -83,56 +71,56 @@ } }, { - "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d", + "@id": "kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA3-512" + "@value": "SHA3-256" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + "@value": "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80" } }, { - "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf", + "@id": "kb:ContentDataFacet-bb169f87-b3b7-55b5-96a5-dec7738c441b", "@type": "uco-observable:ContentDataFacet", "uco-observable:hash": [ { - "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795" + "@id": "kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337" }, { - "@id": "kb:hash-2cae4ae2-d773-5ea2-ba3e-2c4092574959" + "@id": "kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af" }, { - "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07" + "@id": "kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93" }, { - "@id": "kb:hash-9652135a-b58d-592d-b0f4-ac684ecdf6ed" + "@id": "kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492" }, { - "@id": "kb:hash-b656d6d4-b99b-58c8-a487-9889713a8efe" + "@id": "kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72" }, { - "@id": "kb:hash-3228c9ed-792d-5603-afc2-8b9a9752606d" + "@id": "kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a" } ], "uco-observable:sizeInBytes": 4 }, { - "@id": "kb:hash-2b723e8c-6125-5867-83c3-a46753e41a07", + "@id": "kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", - "@value": "SHA256" + "@value": "SHA512" }, "uco-types:hashValue": { "@type": "xsd:hexBinary", - "@value": "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + "@value": "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff" } }, { - "@id": "kb:hash-f067246c-a31a-597d-a84b-7b70ce4c8795", + "@id": "kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337", "@type": "uco-types:Hash", "uco-types:hashMethod": { "@type": "uco-vocabulary:HashNameVocab", @@ -144,14 +132,26 @@ } }, { - "@id": "kb:File-789a91ef-6446-548c-9911-dcc5168f25ea", + "@id": "kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a", + "@type": "uco-types:Hash", + "uco-types:hashMethod": { + "@type": "uco-vocabulary:HashNameVocab", + "@value": "SHA3-512" + }, + "uco-types:hashValue": { + "@type": "xsd:hexBinary", + "@value": "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14" + } + }, + { + "@id": "kb:File-d15507da-29e5-5575-bce6-f59dc788dcc9", "@type": "uco-observable:File", "uco-core:hasFacet": [ { - "@id": "kb:FileFacet-e6bb7192-3a63-561b-87c2-9aea090b49e4" + "@id": "kb:FileFacet-0f05ce89-c779-5bc7-a7f1-3521c895946b" }, { - "@id": "kb:content-data-facet-f48466e4-5394-584d-84ad-c46a7b9680bf" + "@id": "kb:ContentDataFacet-bb169f87-b3b7-55b5-96a5-dec7738c441b" } ] } diff --git a/tests/case_utils/case_file/sample.txt.ttl b/tests/case_utils/case_file/sample.txt.ttl index 8ca8973..d84275d 100644 --- a/tests/case_utils/case_file/sample.txt.ttl +++ b/tests/case_utils/case_file/sample.txt.ttl @@ -8,67 +8,67 @@ @prefix uco-vocabulary: . @prefix xsd: . -kb:File-ace6460a-92a9-58b9-83ea-a18ae87f6e04 +kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8 + a uco-observable:ContentDataFacet ; + uco-observable:hash + kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337 , + kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a , + kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93 , + kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492 , + kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72 , + kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af + ; + uco-observable:sizeInBytes "4"^^xsd:integer ; + . + +kb:File-fd45a350-f052-5558-b8f1-b4d7d82482a9 a uco-observable:File ; uco-core:hasFacet - kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 , - kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 + kb:ContentDataFacet-e154c2c4-bbf7-54f5-b7a0-837a3bbbeac8 , + kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5 ; . -kb:FileFacet-1297a4bd-563b-54c3-ad8a-f67f01ba9b10 +kb:FileFacet-c36436e1-734c-53e5-9e0f-adecdb5e3ab5 a uco-observable:FileFacet ; uco-observable:fileName "sample.txt" ; uco-observable:modifiedTime "2010-01-02T03:04:56+00:00"^^xsd:dateTime ; uco-observable:sizeInBytes "4"^^xsd:integer ; . -kb:content-data-facet-1833f979-1f19-5543-9d82-6cefd144b169 - a uco-observable:ContentDataFacet ; - uco-observable:hash - kb:hash-24644904-83ea-5911-aea8-be687a9f3caf , - kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 , - kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f , - kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 , - kb:hash-720759b8-9544-5dab-ab12-003372b17a4e , - kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 - ; - uco-observable:sizeInBytes "4"^^xsd:integer ; - . - -kb:hash-24644904-83ea-5911-aea8-be687a9f3caf +kb:Hash-244595b7-3a16-50e3-ab5d-1fc767d4f337 a uco-types:Hash ; uco-types:hashMethod "MD5"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "098f6bcd4621d373cade4e832627b4f6"^^xsd:hexBinary ; . -kb:hash-295bdeb5-7f23-5a3f-8b7f-4bb1191b7c21 +kb:Hash-7395c349-e7ab-59b4-acdd-9d857c32413a a uco-types:Hash ; - uco-types:hashMethod "SHA1"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"^^xsd:hexBinary ; + uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; . -kb:hash-39127f5c-598b-51d4-a720-2e949f18f85f +kb:Hash-9527b473-6268-5385-9a74-4eb1a98fef93 a uco-types:Hash ; uco-types:hashMethod "SHA256"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"^^xsd:hexBinary ; . -kb:hash-49e81fee-c6b3-5f5f-af8b-0746d32e4932 +kb:Hash-acdba760-7dc3-5601-8d5f-480c1def8492 a uco-types:Hash ; uco-types:hashMethod "SHA512"^^uco-vocabulary:HashNameVocab ; uco-types:hashValue "ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff"^^xsd:hexBinary ; . -kb:hash-720759b8-9544-5dab-ab12-003372b17a4e +kb:Hash-ae02b896-5a0b-57c0-9441-c90ce6382a72 a uco-types:Hash ; - uco-types:hashMethod "SHA3-512"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14"^^xsd:hexBinary ; + uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; . -kb:hash-b02ebdb3-edf7-5fbf-8088-3d064e316b93 +kb:Hash-fe2030ac-2380-520f-8499-5f2a1e5d69af a uco-types:Hash ; - uco-types:hashMethod "SHA3-256"^^uco-vocabulary:HashNameVocab ; - uco-types:hashValue "36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80"^^xsd:hexBinary ; + uco-types:hashMethod "SHA1"^^uco-vocabulary:HashNameVocab ; + uco-types:hashValue "a94a8fe5ccb19ba61c4c0873d391e987982fbbd3"^^xsd:hexBinary ; . From 712ea3a6682f34ce5fde8ba76badedf897d39b14 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Thu, 4 May 2023 15:22:44 -0400 Subject: [PATCH 10/12] Require namespace argument No effects were observed on Make-managed files. Signed-off-by: Alex Nelson --- case_utils/case_file/__init__.py | 4 +- case_utils/inherent_uuid.py | 73 ++++++-------------------------- 2 files changed, 15 insertions(+), 62 deletions(-) diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 6ab4ab9..536e7fb 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -104,7 +104,7 @@ def create_file_node( n_file_facet: rdflib.URIRef if use_deterministic_uuids: n_file_facet = case_utils.inherent_uuid.get_facet_uriref( - n_file, NS_UCO_OBSERVABLE.FileFacet + n_file, NS_UCO_OBSERVABLE.FileFacet, namespace=node_namespace ) else: n_file_facet = node_namespace["FileFacet-" + case_utils.local_uuid.local_uuid()] @@ -138,7 +138,7 @@ def create_file_node( n_contentdata_facet: rdflib.URIRef if use_deterministic_uuids: n_contentdata_facet = case_utils.inherent_uuid.get_facet_uriref( - n_file, NS_UCO_OBSERVABLE.ContentDataFacet + n_file, NS_UCO_OBSERVABLE.ContentDataFacet, namespace=node_namespace ) else: n_contentdata_facet = node_namespace[ diff --git a/case_utils/inherent_uuid.py b/case_utils/inherent_uuid.py index 832b448..058de59 100644 --- a/case_utils/inherent_uuid.py +++ b/case_utils/inherent_uuid.py @@ -34,17 +34,18 @@ A knowledge base ontology currently uses a prefix 'kb:', expanding to 'http://example.org/kb/'. This knowledge base has a node kb:File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9. What is the IRI of its FileFacet? >>> from case_utils.namespace import NS_UCO_OBSERVABLE ->>> file_iri: str = "http://example.org/kb/File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9" ->>> n_file = URIRef(file_iri) ->>> n_file_facet = get_facet_uriref(n_file, NS_UCO_OBSERVABLE.FileFacet) +>>> ns_kb = Namespace("http://example.org/kb/") +>>> n_file = ns_kb["File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9"] +>>> n_file_facet = get_facet_uriref(n_file, NS_UCO_OBSERVABLE.FileFacet, namespace=ns_kb) >>> n_file_facet rdflib.term.URIRef('http://example.org/kb/FileFacet-01d292e3-0f38-5974-868d-006ef07f5186') A documentation policy change has been enacted, and now all knowledge base individuals need to use the URN example form. What is the FileFacet IRI now? +>>> ns_kb_2 = Namespace("urn:example:kb:") >>> file_iri_2: str = "urn:example:kb:File-ac6b44cf-dc6b-4f2c-a09d-c9beb0a345a9" >>> n_file_2 = URIRef(file_iri_2) ->>> n_file_facet_2 = get_facet_uriref(n_file_2, NS_UCO_OBSERVABLE.FileFacet) +>>> n_file_facet_2 = get_facet_uriref(n_file_2, NS_UCO_OBSERVABLE.FileFacet, namespace=ns_kb_2) >>> n_file_facet_2 rdflib.term.URIRef('urn:example:kb:FileFacet-01d292e3-0f38-5974-868d-006ef07f5186') @@ -53,7 +54,7 @@ >>> assert str(n_file_facet)[-36:] == str(n_file_facet_2)[-36:] """ -__version__ = "0.0.2" +__version__ = "0.0.3" import binascii import re @@ -148,85 +149,37 @@ def facet_inherence_uuid( return uuid.uuid5(uco_object_inherence_uuid, str(n_facet_class)) -def guess_namespace(n_node: URIRef, *args: Any, **kwargs: Any) -> Namespace: - """ - This function attempts simple heuristics to extract from a URIRef its namespace IRI. The heuristics, being simple, are not necessarily going to provide desirable answers, and might be semantically incorrect. - - :rtype rdflib.Namespace: - - Examples - ======== - - >>> guess_namespace(URIRef("http://example.org/kb/Foo")) - Namespace('http://example.org/kb/') - >>> guess_namespace(URIRef("http://example.org/kb#Foo")) - Namespace('http://example.org/kb#') - >>> guess_namespace(URIRef("urn:example:kb#Foo")) - Namespace('urn:example:kb#') - - Note this function might not always give desirable answers. - - >>> guess_namespace(URIRef("urn:example:kb#Foo/Bar")) - Namespace('urn:example:kb#') - >>> guess_namespace(URIRef("urn:example:kb/Foo#Bar")) - Namespace('urn:example:kb/Foo#') - - Some patterns, such as Simple Storage Service (S3) URLs being treated as RDF IRIs, should avoid using this function. This object that houses a PCAP blob can function as an IRI, but the guessed namespace value does not serve as an RDF namespace. - - >>> guess_namespace(URIRef("s3://digitalcorpora/corpora/scenarios/2008-nitroba/nitroba.pcap")) - Namespace('s3://digitalcorpora/corpora/scenarios/2008-nitroba/') - """ - node_iri = str(n_node) - if "#" in node_iri: - namespace_iri = node_iri[: 1 + node_iri.rindex("#")] - elif "/" in node_iri: - namespace_iri = node_iri[: 1 + node_iri.rindex("/")] - else: - namespace_iri = node_iri[: 1 + node_iri.rindex(":")] - return Namespace(namespace_iri) - - def get_facet_uriref( n_uco_object: URIRef, n_facet_class: URIRef, *args: Any, - namespace: Optional[Namespace] = None, + namespace: Namespace, **kwargs: Any ) -> URIRef: """ - :param namespace: An optional RDFLib Namespace object to use for prefixing the Facet IRI with a knowledge base prefix IRI. If not provided, will be guessed from a right-truncation of n_uco_object under some namespace forms (hash-, then slash-, then colon-terminated). This is a potentially fragile guessing mechanism, so users should feel encouraged to provide this optional parameter. - :type namespace rdflib.Namespace or None: + :param namespace: An RDFLib Namespace object to use for prefixing the Facet IRI with a knowledge base prefix IRI. + :type namespace rdflib.Namespace: Examples ======== - What is the URLFacet pertaining to the Nitroba University Scenario's PCAP file, when being interpreted as a Simple Storage Service (S3) object? Note that this example will show that in some cases a (RDFLib) Namespace will be desired. + What is the URLFacet pertaining to the Nitroba University Scenario's PCAP file, when being interpreted as a Simple Storage Service (S3) object? >>> from case_utils.namespace import NS_UCO_OBSERVABLE >>> pcap_url: str = "s3://digitalcorpora/corpora/scenarios/2008-nitroba/nitroba.pcap" >>> n_pcap = URIRef(pcap_url) - >>> n_pcap_url_facet_try1 = get_facet_uriref(n_pcap, NS_UCO_OBSERVABLE.URLFacet) - >>> n_pcap_url_facet_try1 - rdflib.term.URIRef('s3://digitalcorpora/corpora/scenarios/2008-nitroba/URLFacet-4b6023da-dbc4-5e1e-9a2f-aca2a6f6405c') - >>> # Looks like a (RDFLib) Namespace object should be provided. >>> ns_kb = Namespace("http://example.org/kb/") - >>> n_pcap_url_facet_try2 = get_facet_uriref(n_pcap, NS_UCO_OBSERVABLE.URLFacet, namespace=ns_kb) - >>> n_pcap_url_facet_try2 + >>> n_pcap_url_facet = get_facet_uriref(n_pcap, NS_UCO_OBSERVABLE.URLFacet, namespace=ns_kb) + >>> n_pcap_url_facet rdflib.term.URIRef('http://example.org/kb/URLFacet-4b6023da-dbc4-5e1e-9a2f-aca2a6f6405c') """ uco_object_uuid_namespace: uuid.UUID = inherence_uuid(n_uco_object) facet_uuid = facet_inherence_uuid(uco_object_uuid_namespace, n_facet_class) - _namespace: Namespace - if namespace is None: - _namespace = guess_namespace(n_uco_object) - else: - _namespace = namespace - # NOTE: This encodes an assumption that Facets (including extension Facets) use the "Slash" IRI style. facet_class_local_name = str(n_facet_class).rsplit("/")[-1] - return _namespace[facet_class_local_name + "-" + str(facet_uuid)] + return namespace[facet_class_local_name + "-" + str(facet_uuid)] def hash_method_value_uuid(l_hash_method: Literal, l_hash_value: Literal) -> uuid.UUID: From 607381a5d412c0a5144f231b100b9240f08f91da Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 9 May 2023 11:07:31 -0400 Subject: [PATCH 11/12] Refine inherence documentation Signed-off-by: Alex Nelson --- case_utils/inherent_uuid.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/case_utils/inherent_uuid.py b/case_utils/inherent_uuid.py index 058de59..2a3bcb9 100644 --- a/case_utils/inherent_uuid.py +++ b/case_utils/inherent_uuid.py @@ -93,11 +93,11 @@ ) -def inherence_uuid(n_uco_thing: URIRef, *args: Any, **kwargs: Any) -> uuid.UUID: +def inherence_uuid(n_thing: URIRef, *args: Any, **kwargs: Any) -> uuid.UUID: """ - This function returns a UUIDv5 for any UcoThing, that can be used as a UUID Namespace in further `uuid.uuidv5` calls. + This function returns a UUIDv5 for any OWL Thing, that can be used as a UUID Namespace in further `uuid.uuidv5` calls. - In the case that the UcoThing ends with a UUID, that UUID string will be returned wrapped in a UUID object. In all other cases, a UUID version 5 object will be returned for the node as a name under the URL namespace [#rfc4122ac]_. + In the case that the Thing is a UcoThing that ends with a UUID, that UUID string will be returned wrapped in a UUID object. In all other cases, a UUID version 5 object will be returned for the Thing as a name under the URL namespace [#rfc4122ac]_. References ========== @@ -123,7 +123,7 @@ def inherence_uuid(n_uco_thing: URIRef, *args: Any, **kwargs: Any) -> uuid.UUID: >>> case_homepage_uuid_namespace UUID('2c6406b7-3396-5fdd-b9bf-c6e21273e40a') """ - node_iri = str(n_uco_thing) + node_iri = str(n_thing) if len(node_iri) < 40 or RX_UUID.search(node_iri) is None: # <40 -> Too short to have a UUID and scheme. return uuid.uuid5(uuid.NAMESPACE_URL, node_iri) @@ -138,7 +138,7 @@ def facet_inherence_uuid( **kwargs: Any ) -> uuid.UUID: """ - :param n_facet_class: This node is expected to be the `rdflib.term.URIRef` for an OWL Class that is either in UCO or extends a class in UCO, such as `case_utils.namespace.NS_UCO_OBSERVABLE.FileFacet`. The Facet class SHOULD be a 'leaf' class - that is, it should have no OWL subclasses. (This 'SHOULD' might become a more stringent requirement in the future. uco-core:Facet must not be used. There is some question on how this rule should apply for uco-observable:WifiAddressFacet and its parent class uco-observable:MACAddressFacet.) + :param n_facet_class: This node is expected to be the `rdflib.term.URIRef` for an OWL Class that is either in UCO or extends a class in UCO, such as `case_utils.namespace.NS_UCO_OBSERVABLE.FileFacet`. The Facet class SHOULD be a 'leaf' class - that is, it should have no OWL subclasses. (This 'SHOULD' might become a more stringent requirement in the future. uco-core:Facet MUST not be used. There is some question on how this rule should apply for uco-observable:WifiAddressFacet and its parent class uco-observable:MACAddressFacet.) :type n_facet_class: rdflib.term.URIRef """ From 6da50dbe19794c9b5d293b8bd83fc364f341a4d3 Mon Sep 17 00:00:00 2001 From: Alex Nelson Date: Tue, 9 May 2023 11:28:47 -0400 Subject: [PATCH 12/12] Bump versions Signed-off-by: Alex Nelson --- case_utils/__init__.py | 2 +- case_utils/case_file/__init__.py | 2 +- case_utils/inherent_uuid.py | 2 +- case_utils/ontology/src/ontology_and_version_iris.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/case_utils/__init__.py b/case_utils/__init__.py index 9d281a0..a9456fb 100644 --- a/case_utils/__init__.py +++ b/case_utils/__init__.py @@ -11,6 +11,6 @@ # # We would appreciate acknowledgement if the software is used. -__version__ = "0.10.0" +__version__ = "0.11.0" from . import local_uuid # noqa: F401 diff --git a/case_utils/case_file/__init__.py b/case_utils/case_file/__init__.py index 536e7fb..051e07d 100644 --- a/case_utils/case_file/__init__.py +++ b/case_utils/case_file/__init__.py @@ -15,7 +15,7 @@ This module creates a graph object that provides a basic UCO characterization of a single file. The gathered metadata is among the more "durable" file characteristics, i.e. characteristics that would remain consistent when transferring a file between locations. """ -__version__ = "0.4.0" +__version__ = "0.5.0" import argparse import datetime diff --git a/case_utils/inherent_uuid.py b/case_utils/inherent_uuid.py index 2a3bcb9..667a8d1 100644 --- a/case_utils/inherent_uuid.py +++ b/case_utils/inherent_uuid.py @@ -54,7 +54,7 @@ >>> assert str(n_file_facet)[-36:] == str(n_file_facet_2)[-36:] """ -__version__ = "0.0.3" +__version__ = "0.1.0" import binascii import re diff --git a/case_utils/ontology/src/ontology_and_version_iris.py b/case_utils/ontology/src/ontology_and_version_iris.py index e85f0ff..f0139ed 100644 --- a/case_utils/ontology/src/ontology_and_version_iris.py +++ b/case_utils/ontology/src/ontology_and_version_iris.py @@ -15,7 +15,7 @@ This script creates a list of all ontology and version IRIs that have ever existed in a CDO ontology to describe a CDO ontology. I.e. the subject of triples with owl:Ontology as predicate are included, as are the objects of version-referencing triples (owl:versionIRI, owl:incompatibleWith, etc.). """ -__version__ = "0.1.0" +__version__ = "0.1.1" import argparse import typing