Skip to content

Commit e05dc28

Browse files
committed
Use inherent IDs to infer qualified Influence and InstantaneousEvent nodes
This patch uses the inherence UUID functions from `case-utils` PR 112 to replace the blank nodes generared with SPARQL Construct queries. As side effects of this migration, some bugs were fixed with generating some associations, and inherence modeling assumptions are now specified in code comments. This patch also adds `prov:Start` and `prov:End` nodes to reify `prov:Activity` (and `case-investigation:InvestigativeAction`) time boundaries. This will be a significant assistance in OWL-Time-based visualization under development for `case-prov` PR 54. Creating the `prov:Start` and `prov:End` nodes as IRI-identified is also necessary because of a bug observed in `rdf-toolkit`; see their Issue 52. Since `case_prov_rdf` will now be able to generate non-blank nodes, it has picked up two behaviors used in other projects importing `case-utils`: * The `--use-deterministic-uuids` flag has been added. * The `CASE_DEMO_NONRANDOM_UUID_BASE` environment variable can now be used to make non-inherent deterministic UUIDs. A follow-on patch will regenerate Make-managed files. References: * #54 * casework/CASE-Utilities-Python#112 * edmcouncil/rdf-toolkit#52 Signed-off-by: Alex Nelson <[email protected]>
1 parent 3f38d16 commit e05dc28

File tree

10 files changed

+331
-356
lines changed

10 files changed

+331
-356
lines changed

case_prov/case_prov_rdf.py

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,15 @@
2121
import importlib.resources
2222
import logging
2323
import os
24+
import typing
25+
import uuid
2426

27+
import case_utils.inherent_uuid
28+
import case_utils.local_uuid
2529
import rdflib.plugins.sparql
2630
from case_utils.namespace import (
2731
NS_CASE_INVESTIGATION,
32+
NS_RDF,
2833
NS_UCO_ACTION,
2934
NS_UCO_CORE,
3035
NS_UCO_IDENTITY,
@@ -41,12 +46,29 @@ def main() -> None:
4146
parser = argparse.ArgumentParser()
4247
parser.add_argument("-d", "--debug", action="store_true")
4348
parser.add_argument("--allow-empty-results", action="store_true")
49+
parser.add_argument(
50+
"--kb-iri",
51+
default="http://example.org/kb/",
52+
help="Fallback IRI to use for the knowledge base namespace.",
53+
)
54+
parser.add_argument(
55+
"--kb-prefix",
56+
default="kb",
57+
help="Knowledge base prefix for compacted IRI form. If this prefix is already in the input graph, --kb-iri will be ignored.",
58+
)
59+
parser.add_argument(
60+
"--use-deterministic-uuids",
61+
action="store_true",
62+
help="Use UUIDs computed using the case_utils.inherent_uuid module.",
63+
)
4464
parser.add_argument("out_file")
4565
parser.add_argument("in_graph", nargs="+")
4666
args = parser.parse_args()
4767

4868
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO)
4969

70+
case_utils.local_uuid.configure()
71+
5072
in_graph = rdflib.Graph()
5173
out_graph = rdflib.Graph()
5274

@@ -65,6 +87,15 @@ def main() -> None:
6587
for prefix in nsdict:
6688
out_graph.namespace_manager.bind(prefix, nsdict[prefix])
6789

90+
# Determine knowledge base prefix for new inherent nodes.
91+
if args.kb_prefix in nsdict:
92+
NS_KB = rdflib.Namespace(nsdict[args.kb_prefix])
93+
elif args.kb_iri in nsdict.values():
94+
NS_KB = rdflib.Namespace(args.kb_iri)
95+
else:
96+
NS_KB = rdflib.Namespace(args.kb_iri)
97+
out_graph.bind(args.kb_prefix, NS_KB)
98+
6899
# Resource file loading c/o https://stackoverflow.com/a/20885799
69100
query_filenames = []
70101
for resource_filename in importlib.resources.contents(queries):
@@ -75,6 +106,103 @@ def main() -> None:
75106
query_filenames.append(resource_filename)
76107
assert len(query_filenames) > 0, "Failed to load list of query files."
77108

109+
n_activity: rdflib.URIRef
110+
n_agent: rdflib.URIRef
111+
n_entity: rdflib.URIRef
112+
113+
# Generate inherent nodes.
114+
n_actions: typing.Set[rdflib.URIRef] = set()
115+
for n_action in in_graph.subjects(
116+
NS_RDF.type, NS_CASE_INVESTIGATION.InvestigativeAction
117+
):
118+
assert isinstance(n_action, rdflib.URIRef)
119+
n_actions.add(n_action)
120+
for n_action in sorted(n_actions):
121+
assert isinstance(n_action, rdflib.URIRef)
122+
action_inherence_uuid = case_utils.inherent_uuid.inherence_uuid(n_action)
123+
124+
# Generate Ends.
125+
n_end: typing.Optional[rdflib.IdentifiedNode] = None
126+
for n_value in in_graph.objects(n_action, NS_PROV.qualifiedEnd):
127+
assert isinstance(n_value, rdflib.term.IdentifiedNode)
128+
n_end = n_value
129+
if n_end is None:
130+
if args.use_deterministic_uuids:
131+
end_uuid = str(
132+
uuid.uuid5(action_inherence_uuid, str(NS_PROV.qualifiedEnd))
133+
)
134+
else:
135+
end_uuid = case_utils.local_uuid.local_uuid()
136+
n_end = NS_KB["End-" + end_uuid]
137+
out_graph.add((n_action, NS_PROV.qualifiedEnd, n_end))
138+
out_graph.add((n_end, NS_RDF.type, NS_PROV.End))
139+
for l_object in in_graph.objects(n_action, NS_UCO_ACTION.endTime):
140+
out_graph.add((n_end, NS_PROV.atTime, l_object))
141+
142+
# Generate Starts.
143+
n_start: typing.Optional[rdflib.IdentifiedNode] = None
144+
for n_value in in_graph.objects(n_action, NS_PROV.qualifiedStart):
145+
assert isinstance(n_value, rdflib.term.IdentifiedNode)
146+
n_start = n_value
147+
if n_start is None:
148+
if args.use_deterministic_uuids:
149+
start_uuid = str(
150+
uuid.uuid5(action_inherence_uuid, str(NS_PROV.qualifiedStart))
151+
)
152+
else:
153+
start_uuid = case_utils.local_uuid.local_uuid()
154+
n_start = NS_KB["Start-" + start_uuid]
155+
out_graph.add((n_action, NS_PROV.qualifiedStart, n_start))
156+
out_graph.add((n_start, NS_RDF.type, NS_PROV.Start))
157+
for l_object in in_graph.objects(n_action, NS_UCO_ACTION.startTime):
158+
out_graph.add((n_start, NS_PROV.atTime, l_object))
159+
160+
qualified_association_uuid_namespace = uuid.uuid5(
161+
action_inherence_uuid, str(NS_PROV.qualifiedAssociation)
162+
)
163+
for n_agency_predicate in [
164+
NS_UCO_ACTION.instrument,
165+
NS_UCO_ACTION.performer,
166+
]:
167+
_n_agents: typing.Set[rdflib.URIRef] = set()
168+
for _n_agent in in_graph.objects(n_action, n_agency_predicate):
169+
assert isinstance(_n_agent, rdflib.URIRef)
170+
_n_agents.add(_n_agent)
171+
for n_agent in sorted(_n_agents):
172+
if args.use_deterministic_uuids:
173+
association_uuid = str(
174+
uuid.uuid5(qualified_association_uuid_namespace, str(n_agent))
175+
)
176+
else:
177+
association_uuid = case_utils.local_uuid.local_uuid()
178+
n_association = NS_KB["Association-" + association_uuid]
179+
out_graph.add((n_action, NS_PROV.qualifiedAssociation, n_association))
180+
out_graph.add((n_association, NS_RDF.type, NS_PROV.Association))
181+
out_graph.add((n_association, NS_PROV.agent, n_agent))
182+
183+
# A uco-action:Action may have at most one performer, and any number of instruments.
184+
qualified_delegation_uuid_namespace = uuid.uuid5(
185+
action_inherence_uuid, str(NS_PROV.qualifiedDelegation)
186+
)
187+
for n_performer in in_graph.objects(n_action, NS_UCO_ACTION.performer):
188+
delegation_for_performer_uuid_namespace = uuid.uuid5(
189+
qualified_delegation_uuid_namespace, str(n_performer)
190+
)
191+
for n_instrument in in_graph.objects(n_action, NS_UCO_ACTION.instrument):
192+
if args.use_deterministic_uuids:
193+
delegation_uuid = str(
194+
uuid.uuid5(
195+
delegation_for_performer_uuid_namespace, str(n_instrument)
196+
)
197+
)
198+
else:
199+
delegation_uuid = case_utils.local_uuid.local_uuid()
200+
n_delegation = NS_KB["Delegation-" + delegation_uuid]
201+
out_graph.add((n_instrument, NS_PROV.qualifiedDelegation, n_delegation))
202+
out_graph.add((n_delegation, NS_RDF.type, NS_PROV.Delegation))
203+
out_graph.add((n_delegation, NS_PROV.agent, n_performer))
204+
out_graph.add((n_delegation, NS_PROV.hadActivity, n_action))
205+
78206
# Run all supplementing CONSTRUCT queries.
79207
tally = 0
80208
for query_filename in query_filenames:
@@ -97,6 +225,193 @@ def main() -> None:
97225
if not args.allow_empty_results:
98226
raise ValueError("Failed to construct any results.")
99227

228+
# Run inherent qualification steps that are dependent on PROV-O properties being present.
229+
# Store in tmp_triples, to avoid modifying graph while iterating over graph.
230+
tmp_triples: typing.Set[
231+
typing.Tuple[rdflib.term.Node, rdflib.term.Node, rdflib.term.Node]
232+
] = set()
233+
234+
# Build Attributions.
235+
# Modeling assumption over PROV-O: An Attribution inheres in both the Entity and Agent.
236+
for triple in sorted(out_graph.triples((None, NS_PROV.wasAttributedTo, None))):
237+
assert isinstance(triple[0], rdflib.URIRef)
238+
assert isinstance(triple[2], rdflib.URIRef)
239+
n_entity = triple[0]
240+
n_agent = triple[2]
241+
242+
n_attribution: typing.Optional[rdflib.term.IdentifiedNode] = None
243+
for n_object in in_graph.objects(n_entity, NS_PROV.qualifiedAttribution):
244+
if (n_object, NS_PROV.agent, n_agent) in in_graph:
245+
assert isinstance(n_object, rdflib.term.IdentifiedNode)
246+
n_attribution = n_object
247+
if n_attribution is not None:
248+
# No creation necessary.
249+
continue
250+
251+
entity_uuid_namespace = case_utils.inherent_uuid.inherence_uuid(n_entity)
252+
qualifed_attribution_uuid_namespace = uuid.uuid5(
253+
entity_uuid_namespace, str(NS_PROV.qualifiedAttribution)
254+
)
255+
256+
if args.use_deterministic_uuids:
257+
attribution_uuid = str(
258+
uuid.uuid5(qualifed_attribution_uuid_namespace, str(n_agent))
259+
)
260+
else:
261+
attribution_uuid = case_utils.local_uuid.local_uuid()
262+
263+
n_attribution = NS_KB["Attribution-" + attribution_uuid]
264+
tmp_triples.add((n_entity, NS_PROV.qualifiedAttribution, n_attribution))
265+
tmp_triples.add((n_attribution, NS_RDF.type, NS_PROV.Attribution))
266+
tmp_triples.add((n_attribution, NS_PROV.agent, n_agent))
267+
268+
# Build Communications.
269+
# Modeling assumption over PROV-O: A Communication inheres in both the informed Activity and informant Activity.
270+
for triple in sorted(out_graph.triples((None, NS_PROV.wasInformedBy, None))):
271+
assert isinstance(triple[0], rdflib.URIRef)
272+
assert isinstance(triple[2], rdflib.URIRef)
273+
n_informed_activity = triple[0]
274+
n_informant_activity = triple[2]
275+
276+
n_communication: typing.Optional[rdflib.term.IdentifiedNode] = None
277+
for n_object in in_graph.objects(
278+
n_informed_activity, NS_PROV.qualifiedCommunication
279+
):
280+
if (n_object, NS_PROV.activity, n_informant_activity) in in_graph:
281+
assert isinstance(n_object, rdflib.term.IdentifiedNode)
282+
n_communication = n_object
283+
if n_communication is not None:
284+
# No creation necessary.
285+
continue
286+
287+
informed_activity_uuid_namespace = case_utils.inherent_uuid.inherence_uuid(
288+
n_informed_activity
289+
)
290+
qualifed_communication_uuid_namespace = uuid.uuid5(
291+
informed_activity_uuid_namespace, str(NS_PROV.qualifiedCommunication)
292+
)
293+
294+
if args.use_deterministic_uuids:
295+
communication_uuid = str(
296+
uuid.uuid5(
297+
qualifed_communication_uuid_namespace, str(n_informant_activity)
298+
)
299+
)
300+
else:
301+
communication_uuid = case_utils.local_uuid.local_uuid()
302+
303+
n_communication = NS_KB["Communication-" + communication_uuid]
304+
tmp_triples.add(
305+
(n_informed_activity, NS_PROV.qualifiedCommunication, n_communication)
306+
)
307+
tmp_triples.add((n_communication, NS_RDF.type, NS_PROV.Communication))
308+
tmp_triples.add((n_communication, NS_PROV.activity, n_informant_activity))
309+
310+
# Build Derivations.
311+
# Modeling assumption over PROV-O: A Derivation inheres in both the input Entity and output Entity.
312+
for triple in sorted(out_graph.triples((None, NS_PROV.wasDerivedFrom, None))):
313+
assert isinstance(triple[0], rdflib.URIRef)
314+
assert isinstance(triple[2], rdflib.URIRef)
315+
n_action_result = triple[0]
316+
n_action_object = triple[2]
317+
318+
n_derivation: typing.Optional[rdflib.term.IdentifiedNode] = None
319+
for n_object in in_graph.objects(n_action_result, NS_PROV.qualifiedDerivation):
320+
if (n_object, NS_PROV.entity, n_action_object) in in_graph:
321+
assert isinstance(n_object, rdflib.term.IdentifiedNode)
322+
n_derivation = n_object
323+
if n_derivation is not None:
324+
# No creation necessary.
325+
continue
326+
327+
action_result_uuid_namespace = case_utils.inherent_uuid.inherence_uuid(
328+
n_action_result
329+
)
330+
qualifed_derivation_uuid_namespace = uuid.uuid5(
331+
action_result_uuid_namespace, str(NS_PROV.qualifiedDerivation)
332+
)
333+
334+
if args.use_deterministic_uuids:
335+
derivation_uuid = str(
336+
uuid.uuid5(qualifed_derivation_uuid_namespace, str(n_action_object))
337+
)
338+
else:
339+
derivation_uuid = case_utils.local_uuid.local_uuid()
340+
341+
n_derivation = NS_KB["Derivation-" + derivation_uuid]
342+
tmp_triples.add((n_action_result, NS_PROV.qualifiedDerivation, n_derivation))
343+
tmp_triples.add((n_derivation, NS_RDF.type, NS_PROV.Derivation))
344+
tmp_triples.add((n_derivation, NS_PROV.entity, n_action_object))
345+
for n_object in out_graph.objects(n_action_result, NS_PROV.wasGeneratedBy):
346+
tmp_triples.add((n_derivation, NS_PROV.hadActivity, n_object))
347+
348+
# Build Generations.
349+
# Modeling assumption over PROV-O: A Generation inheres solely in the Entity.
350+
for triple in sorted(out_graph.triples((None, NS_PROV.wasGeneratedBy, None))):
351+
assert isinstance(triple[0], rdflib.URIRef)
352+
assert isinstance(triple[2], rdflib.URIRef)
353+
n_entity = triple[0]
354+
n_activity = triple[2]
355+
356+
n_generation: typing.Optional[rdflib.term.IdentifiedNode] = None
357+
for n_object in in_graph.objects(n_entity, NS_PROV.qualifiedGeneration):
358+
assert isinstance(n_object, rdflib.term.IdentifiedNode)
359+
n_generation = n_object
360+
if n_generation is not None:
361+
# No creation necessary.
362+
continue
363+
364+
entity_uuid_namespace = case_utils.inherent_uuid.inherence_uuid(n_entity)
365+
qualifed_generation_uuid_namespace = uuid.uuid5(
366+
entity_uuid_namespace, str(NS_PROV.qualifiedGeneration)
367+
)
368+
369+
if args.use_deterministic_uuids:
370+
generation_uuid = str(
371+
uuid.uuid5(qualifed_generation_uuid_namespace, str(n_activity))
372+
)
373+
else:
374+
generation_uuid = case_utils.local_uuid.local_uuid()
375+
376+
n_generation = NS_KB["Generation-" + generation_uuid]
377+
tmp_triples.add((n_entity, NS_PROV.qualifiedGeneration, n_generation))
378+
tmp_triples.add((n_generation, NS_RDF.type, NS_PROV.Generation))
379+
tmp_triples.add((n_generation, NS_PROV.activity, n_activity))
380+
381+
# Build Usages.
382+
# Modeling assumption over PROV-O: An Attribution inheres in both the Activity and Entity.
383+
for triple in sorted(out_graph.triples((None, NS_PROV.used, None))):
384+
assert isinstance(triple[0], rdflib.URIRef)
385+
assert isinstance(triple[2], rdflib.URIRef)
386+
n_activity = triple[0]
387+
n_entity = triple[2]
388+
389+
n_usage: typing.Optional[rdflib.term.IdentifiedNode] = None
390+
for n_object in in_graph.objects(n_entity, NS_PROV.qualifiedUsage):
391+
assert isinstance(n_object, rdflib.term.IdentifiedNode)
392+
n_usage = n_object
393+
if n_usage is not None:
394+
# No creation necessary.
395+
continue
396+
397+
activity_uuid_namespace = case_utils.inherent_uuid.inherence_uuid(n_activity)
398+
qualifed_usage_uuid_namespace = uuid.uuid5(
399+
activity_uuid_namespace, str(NS_PROV.qualifiedUsage)
400+
)
401+
402+
if args.use_deterministic_uuids:
403+
usage_uuid = str(uuid.uuid5(qualifed_usage_uuid_namespace, str(n_entity)))
404+
else:
405+
usage_uuid = case_utils.local_uuid.local_uuid()
406+
407+
n_usage = NS_KB["Usage-" + usage_uuid]
408+
tmp_triples.add((n_activity, NS_PROV.qualifiedUsage, n_usage))
409+
tmp_triples.add((n_usage, NS_RDF.type, NS_PROV.Usage))
410+
tmp_triples.add((n_usage, NS_PROV.entity, n_entity))
411+
412+
for tmp_triple in tmp_triples:
413+
out_graph.add(tmp_triple)
414+
100415
out_graph.serialize(args.out_file)
101416

102417

0 commit comments

Comments
 (0)