Skip to content

Commit ea38dc1

Browse files
author
James (ODSC)
authored
Merge pull request #139 from OpenDataServices/2023-11-23
Descend into nullable objects and arrays
2 parents e7b4fa4 + 5309f7b commit ea38dc1

File tree

4 files changed

+103
-28
lines changed

4 files changed

+103
-28
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
2020
### Fixed
2121

2222
- Calculate additional codelist values for schema using `anyOf` or `oneOf`, like OCDS record packages https://github.com/open-contracting/lib-cove-ocds/issues/106
23+
- Descend into nullable objects and arrays. (For example, OCDS `parties/details` is nullable, and additional codes for `parties/details/scale` were unreported.) https://github.com/OpenDataServices/lib-cove/pull/131
2324

2425
## [0.31.0] - 2023-07-06
2526

libcove/lib/common.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,13 @@ def schema_dict_fields_generator(schema_dict):
430430
yield field
431431

432432

433+
def _get_types(value: dict):
434+
types = value.get("type", [])
435+
if not isinstance(types, list):
436+
return [types]
437+
return types
438+
439+
433440
def get_schema_codelist_paths(
434441
schema_obj, obj=None, current_path=(), codelist_paths=None, use_extensions=False
435442
):
@@ -465,10 +472,11 @@ def get_schema_codelist_paths(
465472
descendants = [value]
466473

467474
for value in descendants:
468-
if value.get("type") == "object":
475+
types = _get_types(value)
476+
if "object" in types:
469477
get_schema_codelist_paths(None, value, path, codelist_paths)
470-
elif value.get("type") == "array" and isinstance(value.get("items"), dict):
471-
if value.get("items").get("type") == "string":
478+
elif "array" in types and isinstance(value.get("items"), dict):
479+
if "string" in _get_types(value["items"]):
472480
if "codelist" in value["items"] and path not in codelist_paths:
473481
codelist_paths[path] = (
474482
value["items"]["codelist"],
@@ -1258,10 +1266,11 @@ def _get_schema_deprecated_paths(
12581266
)
12591267
)
12601268

1261-
if value.get("type") == "object":
1269+
types = _get_types(value)
1270+
if "object" in types:
12621271
_get_schema_deprecated_paths(None, value, path, deprecated_paths)
12631272
elif (
1264-
value.get("type") == "array"
1273+
"array" in types
12651274
and isinstance(value.get("items"), dict)
12661275
and value.get("items").get("properties")
12671276
):
@@ -1303,10 +1312,11 @@ def _get_schema_non_required_ids(
13031312
if prop == "id" and no_required_id and array_parent and not list_merge:
13041313
id_paths.append(path)
13051314

1306-
if value.get("type") == "object":
1315+
types = _get_types(value)
1316+
if "object" in types:
13071317
_get_schema_non_required_ids(None, value, path, id_paths)
13081318
elif (
1309-
value.get("type") == "array"
1319+
"array" in types
13101320
and isinstance(value.get("items"), dict)
13111321
and value.get("items").get("properties")
13121322
):
@@ -1350,16 +1360,18 @@ def add_is_codelist(obj):
13501360
)
13511361
continue
13521362

1363+
types = _get_types(value)
1364+
13531365
if "codelist" in value:
1354-
if "array" in value.get("type", ""):
1366+
if "array" in types:
13551367
value["items"]["isCodelist"] = True
13561368
else:
13571369
value["isCodelist"] = True
13581370

1359-
if value.get("type") == "object":
1371+
if "object" in types:
13601372
add_is_codelist(value)
13611373
elif (
1362-
value.get("type") == "array"
1374+
"array" in types
13631375
and isinstance(value.get("items"), dict)
13641376
and value.get("items").get("properties")
13651377
):
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"properties": {
3+
"array": {
4+
"type": ["array", "null"],
5+
"items": {
6+
"$ref": "#/definitions/Object"
7+
}
8+
},
9+
"object": {
10+
"$ref": "#/definitions/Object"
11+
}
12+
},
13+
"definitions": {
14+
"Object": {
15+
"type": ["object", "null"],
16+
"properties": {
17+
"id": {
18+
"type": "string"
19+
},
20+
"scale": {
21+
"type": ["array", "null"],
22+
"items": {
23+
"type": "string",
24+
"enum": [
25+
"small",
26+
"large"
27+
]
28+
},
29+
"codelist": "partyScale.csv",
30+
"openCodelist": false,
31+
"deprecated": {
32+
"deprecatedVersion": "1.1",
33+
"description": ""
34+
}
35+
}
36+
}
37+
}
38+
}
39+
}

tests/lib/test_common.py

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
_get_schema_deprecated_paths,
1414
add_field_coverage,
1515
add_field_coverage_percentages,
16+
common_checks_context,
1617
fields_present_generator,
1718
get_additional_codelist_values,
1819
get_additional_fields_info,
@@ -21,13 +22,26 @@
2122
get_json_data_deprecated_fields,
2223
get_json_data_generic_paths,
2324
get_orgids_prefixes,
25+
get_schema_codelist_paths,
2426
get_schema_validation_errors,
2527
org_id_file_fresh,
2628
schema_dict_fields_generator,
2729
unique_ids,
2830
)
2931

3032

33+
def get_schema_obj(fixture):
34+
schema_obj = SchemaJsonMixin()
35+
schema_obj.schema_host = os.path.join(
36+
os.path.dirname(os.path.realpath(__file__)), "fixtures", "common/"
37+
)
38+
schema_obj.release_pkg_schema_name = f"{fixture}.json"
39+
schema_obj.pkg_schema_url = os.path.join(
40+
schema_obj.schema_host, schema_obj.release_pkg_schema_name
41+
)
42+
return schema_obj
43+
44+
3145
def test_unique_ids_False():
3246
ui = False
3347
schema = {"uniqueItems": ui}
@@ -187,15 +201,8 @@ def test_get_json_data_deprecated_fields():
187201
) as fp:
188202
json_data_w_deprecations = json.load(fp)
189203

190-
schema_obj = SchemaJsonMixin()
191-
schema_obj.schema_host = os.path.join(
192-
os.path.dirname(os.path.realpath(__file__)), "fixtures", "common/"
193-
)
194-
schema_obj.release_pkg_schema_name = (
195-
"release_package_schema_ref_release_schema_deprecated_fields.json"
196-
)
197-
schema_obj.pkg_schema_url = os.path.join(
198-
schema_obj.schema_host, schema_obj.release_pkg_schema_name
204+
schema_obj = get_schema_obj(
205+
"release_package_schema_ref_release_schema_deprecated_fields"
199206
)
200207
json_data_paths = get_json_data_generic_paths(
201208
json_data_w_deprecations, generic_paths={}
@@ -293,15 +300,8 @@ def test_fields_present_10():
293300

294301

295302
def test_get_schema_deprecated_paths():
296-
schema_obj = SchemaJsonMixin()
297-
schema_obj.schema_host = os.path.join(
298-
os.path.dirname(os.path.realpath(__file__)), "fixtures", "common/"
299-
)
300-
schema_obj.release_pkg_schema_name = (
301-
"release_package_schema_ref_release_schema_deprecated_fields.json"
302-
)
303-
schema_obj.pkg_schema_url = os.path.join(
304-
schema_obj.schema_host, schema_obj.release_pkg_schema_name
303+
schema_obj = get_schema_obj(
304+
"release_package_schema_ref_release_schema_deprecated_fields"
305305
)
306306
deprecated_paths = _get_schema_deprecated_paths(schema_obj)
307307
expected_results = [
@@ -1379,3 +1379,26 @@ def test_get_additional_codelist_values_oneOf():
13791379
"extension_codelist": False,
13801380
}
13811381
}
1382+
1383+
1384+
def test_nullable_objects_and_arrays(tmpdir):
1385+
json_data = {
1386+
"array": [{"scale": ["a"]}],
1387+
"object": {"scale": ["b"]},
1388+
}
1389+
schema_obj = get_schema_obj("schema_nullable_object_and_array")
1390+
context = {"file_type": "json"}
1391+
common_checks_context(tmpdir, json_data, schema_obj, "", context)
1392+
1393+
# _get_schema_non_required_ids
1394+
assert context["structure_warnings"] == {"missing_ids": ["array/0/id"]}
1395+
1396+
# _get_schema_deprecated_paths
1397+
assert context["deprecated_fields"] == {
1398+
"scale": {"explanation": ("1.1", ""), "paths": ("object",)}
1399+
}
1400+
1401+
assert get_schema_codelist_paths(schema_obj, json_data) == {
1402+
("array", "scale"): ("partyScale.csv", False),
1403+
("object", "scale"): ("partyScale.csv", False),
1404+
}

0 commit comments

Comments
 (0)