Update to rdf-canonize@4.

davidlehn · davidlehn · commit 20fa0cf0e0e7 · 2023-12-08T22:54:04.000-05:00
- **BREAKING**: See the `rdf-canonize` 4.0.0 changelog for **important**
  changes and upgrade notes.
- Update to handle different RDF/JS dataset `BlankNode` format.
- Enable pass through of numerous possible `rdf-canonize` options in a
  `canonize()` `canonizeOptions` parameter.
- Update to use `rdf-canonize` options.
- The `URDNA2015` default algorithm has been changed to `RDFC-1.0` from
  `rdf-canon`.
- Complexity control defaults `maxWorkFactor` or `maxDeepIterations` may
  need to be adjusted to process graphs with certain blank node
  constructs.
- A `signal` option is available to use an `AbortSignal` to limit
  resource usage.
- The internal digest algorithm can be changed.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,16 @@
 
 ### Changed
 - **BREAKING**: Drop support for Node.js < 18.
+- **BREAKING**: Upgrade dependencies.
+  - `rdf-canonize@4`: See the [rdf-canonize][] 4.0.0 changelog for
+    **important** changes and upgrade notes. Of note:
+    - The `URDNA2015` default algorithm has been changed to `RDFC-1.0` from
+      [rdf-canon][].
+    - Complexity control defaults `maxWorkFactor` or `maxDeepIterations` may
+      need to be adjusted to process graphs with certain blank node constructs.
+    - A `signal` option is available to use an `AbortSignal` to limit resource
+      usage.
+    - The internal digest algorithm can be changed.
 
 ## 8.3.2 - 2023-12-06
 
diff --git a/lib/fromRdf.js b/lib/fromRdf.js
@@ -89,7 +89,7 @@ api.fromRDF = async (
     const nodeMap = graphMap[name];
 
     // get subject, predicate, object
-    const s = quad.subject.value;
+    const s = _nodeId(quad.subject);
     const p = quad.predicate.value;
     const o = quad.object;
 
@@ -98,13 +98,14 @@ api.fromRDF = async (
     }
     const node = nodeMap[s];
 
-    const objectIsNode = o.termType.endsWith('Node');
-    if(objectIsNode && !(o.value in nodeMap)) {
-      nodeMap[o.value] = {'@id': o.value};
+    const objectNodeId = _nodeId(o);
+    const objectIsNode = !!objectNodeId;
+    if(objectIsNode && !(objectNodeId in nodeMap)) {
+      nodeMap[objectNodeId] = {'@id': objectNodeId};
     }
 
     if(p === RDF_TYPE && !useRdfType && objectIsNode) {
-      _addValue(node, '@type', o.value, {propertyIsArray: true});
+      _addValue(node, '@type', objectNodeId, {propertyIsArray: true});
       continue;
     }
 
@@ -114,9 +115,9 @@ api.fromRDF = async (
     // object may be an RDF list/partial list node but we can't know easily
     // until all triples are read
     if(objectIsNode) {
-      if(o.value === RDF_NIL) {
+      if(objectNodeId === RDF_NIL) {
         // track rdf:nil uniquely per graph
-        const object = nodeMap[o.value];
+        const object = nodeMap[objectNodeId];
         if(!('usages' in object)) {
           object.usages = [];
         }
@@ -125,12 +126,12 @@ api.fromRDF = async (
           property: p,
           value
         });
-      } else if(o.value in referencedOnce) {
+      } else if(objectNodeId in referencedOnce) {
         // object referenced more than once
-        referencedOnce[o.value] = false;
+        referencedOnce[objectNodeId] = false;
       } else {
         // keep track of single reference
-        referencedOnce[o.value] = {
+        referencedOnce[objectNodeId] = {
           node,
           property: p,
           value
@@ -303,8 +304,9 @@ api.fromRDF = async (
  */
 function _RDFToObject(o, useNativeTypes, rdfDirection, options) {
   // convert NamedNode/BlankNode object to JSON-LD
-  if(o.termType.endsWith('Node')) {
-    return {'@id': o.value};
+  const nodeId = _nodeId(o);
+  if(nodeId) {
+    return {'@id': nodeId};
   }
 
   // convert literal to JSON-LD
@@ -397,3 +399,20 @@ function _RDFToObject(o, useNativeTypes, rdfDirection, options) {
 
   return rval;
 }
+
+/**
+ * Return id for a term. Handles BlankNodes and NamedNodes. Adds a '_:' prefix
+ * for BlanksNodes.
+ *
+ * @param term a term object.
+ *
+ * @return the Node term id or null.
+ */
+function _nodeId(term) {
+  if(term.termType === 'NamedNode') {
+    return term.value;
+  } else if(term.termType === 'BlankNode') {
+    return '_:' + term.value;
+  }
+  return null;
+}
diff --git a/lib/jsonld.js b/lib/jsonld.js
@@ -523,33 +523,39 @@ jsonld.link = async function(input, ctx, options) {
 /**
  * Performs RDF dataset normalization on the given input. The input is JSON-LD
  * unless the 'inputFormat' option is used. The output is an RDF dataset
- * unless the 'format' option is used.
+ * unless a non-null 'format' option is used.
  *
  * Note: Canonicalization sets `safe` to `true` and `base` to `null` by
  * default in order to produce safe outputs and "fail closed" by default. This
  * is different from the other API transformations in this version which
  * allow unsafe defaults (for cryptographic usage) in order to comply with the
  * JSON-LD 1.1 specification.
  *
- * @param input the input to normalize as JSON-LD or as a format specified by
- *          the 'inputFormat' option.
+ * @param input the input to normalize as JSON-LD given as an RDF dataset or as
+ *          a format specified by the 'inputFormat' option.
  * @param [options] the options to use:
- *          [algorithm] the normalization algorithm to use, `URDNA2015` or
- *            `URGNA2012` (default: `URDNA2015`).
  *          [base] the base IRI to use (default: `null`).
  *          [expandContext] a context to expand with.
  *          [skipExpansion] true to assume the input is expanded and skip
  *            expansion, false not to, defaults to false. Some well-formed
  *            and safe-mode checks may be omitted.
- *          [inputFormat] the format if input is not JSON-LD:
- *            'application/n-quads' for N-Quads.
- *          [format] the format if output is a string:
- *            'application/n-quads' for N-Quads.
+ *          [inputFormat] the input format. null for a JSON-LD object,
+ *            'application/n-quads' for N-Quads. (default: null)
+ *          [format] the output format. null for an RDF dataset,
+ *            'application/n-quads' for an N-Quads string. (default: N-Quads)
  *          [documentLoader(url, options)] the document loader.
- *          [useNative] true to use a native canonize algorithm
  *          [rdfDirection] null or 'i18n-datatype' to support RDF
  *             transformation of @direction (default: null).
  *          [safe] true to use safe mode. (default: true).
+ *          [canonizeOptions] options to pass to rdf-canonize canonize(). See
+ *            rdf-canonize for more details. Commonly used options, and their
+ *            defaults, are:
+ *            algorithm="RDFC-1.0",
+ *            messageDigestAlgorithm="sha256",
+ *            canonicalIdMap,
+ *            maxWorkFactor=1,
+ *            maxDeepIterations=-1,
+ *            and signal=null.
  *          [contextResolver] internal use only.
  *
  * @return a Promise that resolves to the normalized output.
@@ -559,15 +565,19 @@ jsonld.normalize = jsonld.canonize = async function(input, options) {
     throw new TypeError('Could not canonize, too few arguments.');
   }
 
-  // set default options
+  // set toRDF options
   options = _setDefaults(options, {
-    base: _isString(input) ? input : null,
-    algorithm: 'URDNA2015',
     skipExpansion: false,
     safe: true,
     contextResolver: new ContextResolver(
       {sharedCache: _resolvedContextCache})
   });
+
+  // set canonize options
+  const canonizeOptions = Object.assign({}, {
+    algorithm: 'RDFC-1.0'
+  }, options.canonizeOptions || null);
+
   if('inputFormat' in options) {
     if(options.inputFormat !== 'application/n-quads' &&
       options.inputFormat !== 'application/nquads') {
@@ -579,17 +589,18 @@ jsonld.normalize = jsonld.canonize = async function(input, options) {
     const parsedInput = NQuads.parse(input);
 
     // do canonicalization
-    return canonize.canonize(parsedInput, options);
+    return canonize.canonize(parsedInput, canonizeOptions);
   }
 
   // convert to RDF dataset then do normalization
   const opts = {...options};
   delete opts.format;
+  delete opts.canonizeOptions;
   opts.produceGeneralizedRdf = false;
   const dataset = await jsonld.toRDF(input, opts);
 
   // do canonicalization
-  return canonize.canonize(dataset, options);
+  return canonize.canonize(dataset, canonizeOptions);
 };
 
 /**
@@ -653,8 +664,8 @@ jsonld.fromRDF = async function(dataset, options) {
  *          [skipExpansion] true to assume the input is expanded and skip
  *            expansion, false not to, defaults to false. Some well-formed
  *            and safe-mode checks may be omitted.
- *          [format] the format to use to output a string:
- *            'application/n-quads' for N-Quads.
+ *          [format] the output format. null for an RDF dataset,
+ *            'application/n-quads' for an N-Quads string. (default: null)
  *          [produceGeneralizedRdf] true to output generalized RDF, false
  *            to produce only standard RDF (default: false).
  *          [documentLoader(url, options)] the document loader.
@@ -672,7 +683,6 @@ jsonld.toRDF = async function(input, options) {
 
   // set default options
   options = _setDefaults(options, {
-    base: _isString(input) ? input : '',
     skipExpansion: false,
     contextResolver: new ContextResolver(
       {sharedCache: _resolvedContextCache})
diff --git a/lib/toRdf.js b/lib/toRdf.js
@@ -63,12 +63,7 @@ api.toRDF = (input, options) => {
     if(graphName === '@default') {
       graphTerm = {termType: 'DefaultGraph', value: ''};
     } else if(_isAbsoluteIri(graphName)) {
-      if(graphName.startsWith('_:')) {
-        graphTerm = {termType: 'BlankNode'};
-      } else {
-        graphTerm = {termType: 'NamedNode'};
-      }
-      graphTerm.value = graphName;
+      graphTerm = _makeTerm(graphName);
     } else {
       // skip relative IRIs (not valid RDF)
       if(options.eventHandler) {
@@ -119,10 +114,7 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) {
 
       for(const item of items) {
         // RDF subject
-        const subject = {
-          termType: id.startsWith('_:') ? 'BlankNode' : 'NamedNode',
-          value: id
-        };
+        const subject = _makeTerm(id);
 
         // skip relative IRI subjects (not valid RDF)
         if(!_isAbsoluteIri(id)) {
@@ -144,10 +136,7 @@ function _graphToRDF(dataset, graph, graphTerm, issuer, options) {
         }
 
         // RDF predicate
-        const predicate = {
-          termType: property.startsWith('_:') ? 'BlankNode' : 'NamedNode',
-          value: property
-        };
+        const predicate = _makeTerm(property);
 
         // skip relative IRI predicates (not valid RDF)
         if(!_isAbsoluteIri(property)) {
@@ -226,13 +215,16 @@ function _listToRDF(list, issuer, dataset, graphTerm, rdfDirection, options) {
 
   const last = list.pop();
   // Result is the head of the list
-  const result = last ? {termType: 'BlankNode', value: issuer.getId()} : nil;
+  const result = last ? {
+    termType: 'BlankNode',
+    value: issuer.getId().slice(2)
+  } : nil;
   let subject = result;
 
   for(const item of list) {
     const object = _objectToRDF(
       item, issuer, dataset, graphTerm, rdfDirection, options);
-    const next = {termType: 'BlankNode', value: issuer.getId()};
+    const next = {termType: 'BlankNode', value: issuer.getId().slice(2)};
     dataset.push({
       subject,
       predicate: first,
@@ -284,14 +276,16 @@ function _listToRDF(list, issuer, dataset, graphTerm, rdfDirection, options) {
 function _objectToRDF(
   item, issuer, dataset, graphTerm, rdfDirection, options
 ) {
-  const object = {};
+  let object;
 
   // convert value object to RDF
   if(graphTypes.isValue(item)) {
-    object.termType = 'Literal';
-    object.value = undefined;
-    object.datatype = {
-      termType: 'NamedNode'
+    object = {
+      termType: 'Literal',
+      value: undefined,
+      datatype: {
+        termType: 'NamedNode'
+      }
     };
     let value = item['@value'];
     const datatype = item['@type'] || null;
@@ -374,13 +368,14 @@ function _objectToRDF(
   } else if(graphTypes.isList(item)) {
     const _list = _listToRDF(
       item['@list'], issuer, dataset, graphTerm, rdfDirection, options);
-    object.termType = _list.termType;
-    object.value = _list.value;
+    object = {
+      termType: _list.termType,
+      value: _list.value
+    };
   } else {
     // convert string/node object to RDF
     const id = types.isObject(item) ? item['@id'] : item;
-    object.termType = id.startsWith('_:') ? 'BlankNode' : 'NamedNode';
-    object.value = id;
+    object = _makeTerm(id);
   }
 
   // skip relative IRIs, not valid RDF
@@ -404,3 +399,24 @@ function _objectToRDF(
 
   return object;
 }
+
+/**
+ * Make a term from an id. Handles BlankNodes and NamedNodes based on a
+ * possible '_:' id prefix. The prefix is removed for BlankNodes.
+ *
+ * @param id a term id.
+ *
+ * @return a term object.
+ */
+function _makeTerm(id) {
+  if(id.startsWith('_:')) {
+    return {
+      termType: 'BlankNode',
+      value: id.slice(2)
+    };
+  }
+  return {
+    termType: 'NamedNode',
+    value: id
+  };
+}
diff --git a/package.json b/package.json
@@ -32,7 +32,7 @@
     "@digitalbazaar/http-client": "^3.4.1",
     "canonicalize": "^1.0.1",
     "lru-cache": "^6.0.0",
-    "rdf-canonize": "^3.4.0"
+    "rdf-canonize": "^4.0.1"
   },
   "devDependencies": {
     "@babel/core": "^7.21.8",
diff --git a/tests/misc.js b/tests/misc.js
@@ -4030,7 +4030,7 @@ _:b0 <ex:p> "v" .
 ]
 ;
       const nq = `\
-_:b0 <_:b1> "v" .
+_:b0 _:b1 "v" .
 `;
 
       await _test({
diff --git a/tests/test.js b/tests/test.js

Original file line number	Diff line number	Diff line change
`@@ -4030,7 +4030,7 @@ _:b0 <ex:p> "v" .`
`4030`	`4030`	`]`
`4031`	`4031`	`;`
`4032`	`4032`	const nq = `\
`4033`		`-_:b0 <_:b1> "v" .`
	`4033`	`+_:b0 _:b1 "v" .`
`4034`	`4034`	`;
`4035`	`4035`
`4036`	`4036`	`await _test({`