21
21
import importlib .resources
22
22
import logging
23
23
import os
24
+ import typing
25
+ import uuid
24
26
27
+ import case_utils .inherent_uuid
28
+ import case_utils .local_uuid
25
29
import rdflib .plugins .sparql
26
30
from case_utils .namespace import (
27
31
NS_CASE_INVESTIGATION ,
32
+ NS_RDF ,
28
33
NS_UCO_ACTION ,
29
34
NS_UCO_CORE ,
30
35
NS_UCO_IDENTITY ,
@@ -41,12 +46,29 @@ def main() -> None:
41
46
parser = argparse .ArgumentParser ()
42
47
parser .add_argument ("-d" , "--debug" , action = "store_true" )
43
48
parser .add_argument ("--allow-empty-results" , action = "store_true" )
49
+ parser .add_argument (
50
+ "--kb-iri" ,
51
+ default = "http://example.org/kb/" ,
52
+ help = "Fallback IRI to use for the knowledge base namespace." ,
53
+ )
54
+ parser .add_argument (
55
+ "--kb-prefix" ,
56
+ default = "kb" ,
57
+ help = "Knowledge base prefix for compacted IRI form. If this prefix is already in the input graph, --kb-iri will be ignored." ,
58
+ )
59
+ parser .add_argument (
60
+ "--use-deterministic-uuids" ,
61
+ action = "store_true" ,
62
+ help = "Use UUIDs computed using the case_utils.inherent_uuid module." ,
63
+ )
44
64
parser .add_argument ("out_file" )
45
65
parser .add_argument ("in_graph" , nargs = "+" )
46
66
args = parser .parse_args ()
47
67
48
68
logging .basicConfig (level = logging .DEBUG if args .debug else logging .INFO )
49
69
70
+ case_utils .local_uuid .configure ()
71
+
50
72
in_graph = rdflib .Graph ()
51
73
out_graph = rdflib .Graph ()
52
74
@@ -65,6 +87,15 @@ def main() -> None:
65
87
for prefix in nsdict :
66
88
out_graph .namespace_manager .bind (prefix , nsdict [prefix ])
67
89
90
+ # Determine knowledge base prefix for new inherent nodes.
91
+ if args .kb_prefix in nsdict :
92
+ NS_KB = rdflib .Namespace (nsdict [args .kb_prefix ])
93
+ elif args .kb_iri in nsdict .values ():
94
+ NS_KB = rdflib .Namespace (args .kb_iri )
95
+ else :
96
+ NS_KB = rdflib .Namespace (args .kb_iri )
97
+ out_graph .bind (args .kb_prefix , NS_KB )
98
+
68
99
# Resource file loading c/o https://stackoverflow.com/a/20885799
69
100
query_filenames = []
70
101
for resource_filename in importlib .resources .contents (queries ):
@@ -75,6 +106,103 @@ def main() -> None:
75
106
query_filenames .append (resource_filename )
76
107
assert len (query_filenames ) > 0 , "Failed to load list of query files."
77
108
109
+ n_activity : rdflib .URIRef
110
+ n_agent : rdflib .URIRef
111
+ n_entity : rdflib .URIRef
112
+
113
+ # Generate inherent nodes.
114
+ n_actions : typing .Set [rdflib .URIRef ] = set ()
115
+ for n_action in in_graph .subjects (
116
+ NS_RDF .type , NS_CASE_INVESTIGATION .InvestigativeAction
117
+ ):
118
+ assert isinstance (n_action , rdflib .URIRef )
119
+ n_actions .add (n_action )
120
+ for n_action in sorted (n_actions ):
121
+ assert isinstance (n_action , rdflib .URIRef )
122
+ action_inherence_uuid = case_utils .inherent_uuid .inherence_uuid (n_action )
123
+
124
+ # Generate Ends.
125
+ n_end : typing .Optional [rdflib .IdentifiedNode ] = None
126
+ for n_value in in_graph .objects (n_action , NS_PROV .qualifiedEnd ):
127
+ assert isinstance (n_value , rdflib .term .IdentifiedNode )
128
+ n_end = n_value
129
+ if n_end is None :
130
+ if args .use_deterministic_uuids :
131
+ end_uuid = str (
132
+ uuid .uuid5 (action_inherence_uuid , str (NS_PROV .qualifiedEnd ))
133
+ )
134
+ else :
135
+ end_uuid = case_utils .local_uuid .local_uuid ()
136
+ n_end = NS_KB ["End-" + end_uuid ]
137
+ out_graph .add ((n_action , NS_PROV .qualifiedEnd , n_end ))
138
+ out_graph .add ((n_end , NS_RDF .type , NS_PROV .End ))
139
+ for l_object in in_graph .objects (n_action , NS_UCO_ACTION .endTime ):
140
+ out_graph .add ((n_end , NS_PROV .atTime , l_object ))
141
+
142
+ # Generate Starts.
143
+ n_start : typing .Optional [rdflib .IdentifiedNode ] = None
144
+ for n_value in in_graph .objects (n_action , NS_PROV .qualifiedStart ):
145
+ assert isinstance (n_value , rdflib .term .IdentifiedNode )
146
+ n_start = n_value
147
+ if n_start is None :
148
+ if args .use_deterministic_uuids :
149
+ start_uuid = str (
150
+ uuid .uuid5 (action_inherence_uuid , str (NS_PROV .qualifiedStart ))
151
+ )
152
+ else :
153
+ start_uuid = case_utils .local_uuid .local_uuid ()
154
+ n_start = NS_KB ["Start-" + start_uuid ]
155
+ out_graph .add ((n_action , NS_PROV .qualifiedStart , n_start ))
156
+ out_graph .add ((n_start , NS_RDF .type , NS_PROV .Start ))
157
+ for l_object in in_graph .objects (n_action , NS_UCO_ACTION .startTime ):
158
+ out_graph .add ((n_start , NS_PROV .atTime , l_object ))
159
+
160
+ qualified_association_uuid_namespace = uuid .uuid5 (
161
+ action_inherence_uuid , str (NS_PROV .qualifiedAssociation )
162
+ )
163
+ for n_agency_predicate in [
164
+ NS_UCO_ACTION .instrument ,
165
+ NS_UCO_ACTION .performer ,
166
+ ]:
167
+ _n_agents : typing .Set [rdflib .URIRef ] = set ()
168
+ for _n_agent in in_graph .objects (n_action , n_agency_predicate ):
169
+ assert isinstance (_n_agent , rdflib .URIRef )
170
+ _n_agents .add (_n_agent )
171
+ for n_agent in sorted (_n_agents ):
172
+ if args .use_deterministic_uuids :
173
+ association_uuid = str (
174
+ uuid .uuid5 (qualified_association_uuid_namespace , str (n_agent ))
175
+ )
176
+ else :
177
+ association_uuid = case_utils .local_uuid .local_uuid ()
178
+ n_association = NS_KB ["Association-" + association_uuid ]
179
+ out_graph .add ((n_action , NS_PROV .qualifiedAssociation , n_association ))
180
+ out_graph .add ((n_association , NS_RDF .type , NS_PROV .Association ))
181
+ out_graph .add ((n_association , NS_PROV .agent , n_agent ))
182
+
183
+ # A uco-action:Action may have at most one performer, and any number of instruments.
184
+ qualified_delegation_uuid_namespace = uuid .uuid5 (
185
+ action_inherence_uuid , str (NS_PROV .qualifiedDelegation )
186
+ )
187
+ for n_performer in in_graph .objects (n_action , NS_UCO_ACTION .performer ):
188
+ delegation_for_performer_uuid_namespace = uuid .uuid5 (
189
+ qualified_delegation_uuid_namespace , str (n_performer )
190
+ )
191
+ for n_instrument in in_graph .objects (n_action , NS_UCO_ACTION .instrument ):
192
+ if args .use_deterministic_uuids :
193
+ delegation_uuid = str (
194
+ uuid .uuid5 (
195
+ delegation_for_performer_uuid_namespace , str (n_instrument )
196
+ )
197
+ )
198
+ else :
199
+ delegation_uuid = case_utils .local_uuid .local_uuid ()
200
+ n_delegation = NS_KB ["Delegation-" + delegation_uuid ]
201
+ out_graph .add ((n_instrument , NS_PROV .qualifiedDelegation , n_delegation ))
202
+ out_graph .add ((n_delegation , NS_RDF .type , NS_PROV .Delegation ))
203
+ out_graph .add ((n_delegation , NS_PROV .agent , n_performer ))
204
+ out_graph .add ((n_delegation , NS_PROV .hadActivity , n_action ))
205
+
78
206
# Run all supplementing CONSTRUCT queries.
79
207
tally = 0
80
208
for query_filename in query_filenames :
@@ -97,6 +225,193 @@ def main() -> None:
97
225
if not args .allow_empty_results :
98
226
raise ValueError ("Failed to construct any results." )
99
227
228
+ # Run inherent qualification steps that are dependent on PROV-O properties being present.
229
+ # Store in tmp_triples, to avoid modifying graph while iterating over graph.
230
+ tmp_triples : typing .Set [
231
+ typing .Tuple [rdflib .term .Node , rdflib .term .Node , rdflib .term .Node ]
232
+ ] = set ()
233
+
234
+ # Build Attributions.
235
+ # Modeling assumption over PROV-O: An Attribution inheres in both the Entity and Agent.
236
+ for triple in sorted (out_graph .triples ((None , NS_PROV .wasAttributedTo , None ))):
237
+ assert isinstance (triple [0 ], rdflib .URIRef )
238
+ assert isinstance (triple [2 ], rdflib .URIRef )
239
+ n_entity = triple [0 ]
240
+ n_agent = triple [2 ]
241
+
242
+ n_attribution : typing .Optional [rdflib .term .IdentifiedNode ] = None
243
+ for n_object in in_graph .objects (n_entity , NS_PROV .qualifiedAttribution ):
244
+ if (n_object , NS_PROV .agent , n_agent ) in in_graph :
245
+ assert isinstance (n_object , rdflib .term .IdentifiedNode )
246
+ n_attribution = n_object
247
+ if n_attribution is not None :
248
+ # No creation necessary.
249
+ continue
250
+
251
+ entity_uuid_namespace = case_utils .inherent_uuid .inherence_uuid (n_entity )
252
+ qualifed_attribution_uuid_namespace = uuid .uuid5 (
253
+ entity_uuid_namespace , str (NS_PROV .qualifiedAttribution )
254
+ )
255
+
256
+ if args .use_deterministic_uuids :
257
+ attribution_uuid = str (
258
+ uuid .uuid5 (qualifed_attribution_uuid_namespace , str (n_agent ))
259
+ )
260
+ else :
261
+ attribution_uuid = case_utils .local_uuid .local_uuid ()
262
+
263
+ n_attribution = NS_KB ["Attribution-" + attribution_uuid ]
264
+ tmp_triples .add ((n_entity , NS_PROV .qualifiedAttribution , n_attribution ))
265
+ tmp_triples .add ((n_attribution , NS_RDF .type , NS_PROV .Attribution ))
266
+ tmp_triples .add ((n_attribution , NS_PROV .agent , n_agent ))
267
+
268
+ # Build Communications.
269
+ # Modeling assumption over PROV-O: A Communication inheres in both the informed Activity and informant Activity.
270
+ for triple in sorted (out_graph .triples ((None , NS_PROV .wasInformedBy , None ))):
271
+ assert isinstance (triple [0 ], rdflib .URIRef )
272
+ assert isinstance (triple [2 ], rdflib .URIRef )
273
+ n_informed_activity = triple [0 ]
274
+ n_informant_activity = triple [2 ]
275
+
276
+ n_communication : typing .Optional [rdflib .term .IdentifiedNode ] = None
277
+ for n_object in in_graph .objects (
278
+ n_informed_activity , NS_PROV .qualifiedCommunication
279
+ ):
280
+ if (n_object , NS_PROV .activity , n_informant_activity ) in in_graph :
281
+ assert isinstance (n_object , rdflib .term .IdentifiedNode )
282
+ n_communication = n_object
283
+ if n_communication is not None :
284
+ # No creation necessary.
285
+ continue
286
+
287
+ informed_activity_uuid_namespace = case_utils .inherent_uuid .inherence_uuid (
288
+ n_informed_activity
289
+ )
290
+ qualifed_communication_uuid_namespace = uuid .uuid5 (
291
+ informed_activity_uuid_namespace , str (NS_PROV .qualifiedCommunication )
292
+ )
293
+
294
+ if args .use_deterministic_uuids :
295
+ communication_uuid = str (
296
+ uuid .uuid5 (
297
+ qualifed_communication_uuid_namespace , str (n_informant_activity )
298
+ )
299
+ )
300
+ else :
301
+ communication_uuid = case_utils .local_uuid .local_uuid ()
302
+
303
+ n_communication = NS_KB ["Communication-" + communication_uuid ]
304
+ tmp_triples .add (
305
+ (n_informed_activity , NS_PROV .qualifiedCommunication , n_communication )
306
+ )
307
+ tmp_triples .add ((n_communication , NS_RDF .type , NS_PROV .Communication ))
308
+ tmp_triples .add ((n_communication , NS_PROV .activity , n_informant_activity ))
309
+
310
+ # Build Derivations.
311
+ # Modeling assumption over PROV-O: A Derivation inheres in both the input Entity and output Entity.
312
+ for triple in sorted (out_graph .triples ((None , NS_PROV .wasDerivedFrom , None ))):
313
+ assert isinstance (triple [0 ], rdflib .URIRef )
314
+ assert isinstance (triple [2 ], rdflib .URIRef )
315
+ n_action_result = triple [0 ]
316
+ n_action_object = triple [2 ]
317
+
318
+ n_derivation : typing .Optional [rdflib .term .IdentifiedNode ] = None
319
+ for n_object in in_graph .objects (n_action_result , NS_PROV .qualifiedDerivation ):
320
+ if (n_object , NS_PROV .entity , n_action_object ) in in_graph :
321
+ assert isinstance (n_object , rdflib .term .IdentifiedNode )
322
+ n_derivation = n_object
323
+ if n_derivation is not None :
324
+ # No creation necessary.
325
+ continue
326
+
327
+ action_result_uuid_namespace = case_utils .inherent_uuid .inherence_uuid (
328
+ n_action_result
329
+ )
330
+ qualifed_derivation_uuid_namespace = uuid .uuid5 (
331
+ action_result_uuid_namespace , str (NS_PROV .qualifiedDerivation )
332
+ )
333
+
334
+ if args .use_deterministic_uuids :
335
+ derivation_uuid = str (
336
+ uuid .uuid5 (qualifed_derivation_uuid_namespace , str (n_action_object ))
337
+ )
338
+ else :
339
+ derivation_uuid = case_utils .local_uuid .local_uuid ()
340
+
341
+ n_derivation = NS_KB ["Derivation-" + derivation_uuid ]
342
+ tmp_triples .add ((n_action_result , NS_PROV .qualifiedDerivation , n_derivation ))
343
+ tmp_triples .add ((n_derivation , NS_RDF .type , NS_PROV .Derivation ))
344
+ tmp_triples .add ((n_derivation , NS_PROV .entity , n_action_object ))
345
+ for n_object in out_graph .objects (n_action_result , NS_PROV .wasGeneratedBy ):
346
+ tmp_triples .add ((n_derivation , NS_PROV .hadActivity , n_object ))
347
+
348
+ # Build Generations.
349
+ # Modeling assumption over PROV-O: A Generation inheres solely in the Entity.
350
+ for triple in sorted (out_graph .triples ((None , NS_PROV .wasGeneratedBy , None ))):
351
+ assert isinstance (triple [0 ], rdflib .URIRef )
352
+ assert isinstance (triple [2 ], rdflib .URIRef )
353
+ n_entity = triple [0 ]
354
+ n_activity = triple [2 ]
355
+
356
+ n_generation : typing .Optional [rdflib .term .IdentifiedNode ] = None
357
+ for n_object in in_graph .objects (n_entity , NS_PROV .qualifiedGeneration ):
358
+ assert isinstance (n_object , rdflib .term .IdentifiedNode )
359
+ n_generation = n_object
360
+ if n_generation is not None :
361
+ # No creation necessary.
362
+ continue
363
+
364
+ entity_uuid_namespace = case_utils .inherent_uuid .inherence_uuid (n_entity )
365
+ qualifed_generation_uuid_namespace = uuid .uuid5 (
366
+ entity_uuid_namespace , str (NS_PROV .qualifiedGeneration )
367
+ )
368
+
369
+ if args .use_deterministic_uuids :
370
+ generation_uuid = str (
371
+ uuid .uuid5 (qualifed_generation_uuid_namespace , str (n_activity ))
372
+ )
373
+ else :
374
+ generation_uuid = case_utils .local_uuid .local_uuid ()
375
+
376
+ n_generation = NS_KB ["Generation-" + generation_uuid ]
377
+ tmp_triples .add ((n_entity , NS_PROV .qualifiedGeneration , n_generation ))
378
+ tmp_triples .add ((n_generation , NS_RDF .type , NS_PROV .Generation ))
379
+ tmp_triples .add ((n_generation , NS_PROV .activity , n_activity ))
380
+
381
+ # Build Usages.
382
+ # Modeling assumption over PROV-O: An Attribution inheres in both the Activity and Entity.
383
+ for triple in sorted (out_graph .triples ((None , NS_PROV .used , None ))):
384
+ assert isinstance (triple [0 ], rdflib .URIRef )
385
+ assert isinstance (triple [2 ], rdflib .URIRef )
386
+ n_activity = triple [0 ]
387
+ n_entity = triple [2 ]
388
+
389
+ n_usage : typing .Optional [rdflib .term .IdentifiedNode ] = None
390
+ for n_object in in_graph .objects (n_entity , NS_PROV .qualifiedUsage ):
391
+ assert isinstance (n_object , rdflib .term .IdentifiedNode )
392
+ n_usage = n_object
393
+ if n_usage is not None :
394
+ # No creation necessary.
395
+ continue
396
+
397
+ activity_uuid_namespace = case_utils .inherent_uuid .inherence_uuid (n_activity )
398
+ qualifed_usage_uuid_namespace = uuid .uuid5 (
399
+ activity_uuid_namespace , str (NS_PROV .qualifiedUsage )
400
+ )
401
+
402
+ if args .use_deterministic_uuids :
403
+ usage_uuid = str (uuid .uuid5 (qualifed_usage_uuid_namespace , str (n_entity )))
404
+ else :
405
+ usage_uuid = case_utils .local_uuid .local_uuid ()
406
+
407
+ n_usage = NS_KB ["Usage-" + usage_uuid ]
408
+ tmp_triples .add ((n_activity , NS_PROV .qualifiedUsage , n_usage ))
409
+ tmp_triples .add ((n_usage , NS_RDF .type , NS_PROV .Usage ))
410
+ tmp_triples .add ((n_usage , NS_PROV .entity , n_entity ))
411
+
412
+ for tmp_triple in tmp_triples :
413
+ out_graph .add (tmp_triple )
414
+
100
415
out_graph .serialize (args .out_file )
101
416
102
417
0 commit comments