Skip to content

Commit fb9fba1

Browse files
authored
Add automation to creating boto patches (#3742)
* Create more automation for boto data gathering
1 parent 0bb6ab4 commit fb9fba1

File tree

1,174 files changed

+57415
-7470
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,174 files changed

+57415
-7470
lines changed

.github/workflows/maintenance-v1.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ jobs:
2525
cfn-lint --update-documentation
2626
scripts/update_specs_from_pricing.py
2727
scripts/update_serverless_aws_policies.py
28-
scripts/update_schemas_from_boto.py
28+
scripts/boto/update_schemas_from_boto.py
2929
scripts/update_schemas_from_aws_api.py
3030
cfn-lint --update-specs
3131
echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT

scripts/boto/_automated_patches.py

+210
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
"""
2+
Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
SPDX-License-Identifier: MIT-0
4+
"""
5+
6+
import json
7+
from pathlib import Path
8+
from typing import Any
9+
10+
from _types import AllPatches, Patch, ResourcePatches
11+
12+
skip = [
13+
"account",
14+
"chime",
15+
"chimesdkidentity",
16+
"chimesdkmessaging",
17+
"chimesdkmeetings",
18+
"chimesdkvoice",
19+
"paymentcryptographydata",
20+
"rdsdata",
21+
"finspacedata",
22+
"appconfigdata",
23+
"iotjobsdata",
24+
"dataexchange",
25+
"bedrockruntime",
26+
"swf",
27+
"cloudhsm",
28+
"cloudhsmv2",
29+
"workdocs",
30+
]
31+
32+
skip_property_names = ["State"]
33+
34+
_fields = ["pattern", "enum"]
35+
36+
37+
def renamer(name):
38+
manual_fixes = {
39+
"acm": "CertificateManager",
40+
"mq": "AmazonMQ",
41+
"kafka": "MSK",
42+
"firehose": "KinesisFirehose",
43+
"es": "ElasticSearch",
44+
}
45+
if name in manual_fixes:
46+
return manual_fixes[name].lower()
47+
48+
return name.replace("-", "").lower()
49+
50+
51+
def get_shapes(data: dict[str, Any], name: str):
52+
shapes: dict[str, Any] = {}
53+
54+
input_shape = data.get("operations", {}).get(name, {}).get("input", {}).get("shape")
55+
if not input_shape:
56+
return shapes
57+
58+
for shape_name, shap_data in data.get("shapes", {}).items():
59+
if "enum" in shap_data:
60+
shapes[shape_name] = {"enum": shap_data.get("enum")}
61+
62+
return shapes
63+
64+
65+
def get_schema_create_operations(data: dict[str, Any]) -> list[str]:
66+
results = []
67+
68+
action_prefixes = ["Put", "Add", "Create", "Register", "Allocate", "Start", "Run"]
69+
70+
for api in data.get("handlers", {}).get("create", {}).get("permissions", []):
71+
if ":" not in api:
72+
continue
73+
api = api.split(":")[1]
74+
for action_prefix in action_prefixes:
75+
if api.startswith(action_prefix):
76+
results.append(api)
77+
78+
return results
79+
80+
81+
def get_last_date(service_dir: Path) -> str:
82+
last_date = "0000-00-00"
83+
for date_dir in service_dir.iterdir():
84+
if not date_dir.is_dir():
85+
continue
86+
87+
if date_dir.name > last_date:
88+
last_date = date_dir.name
89+
90+
return last_date
91+
92+
93+
def _per_resource_patch(
94+
schema_data: dict[str, Any], boto_data: dict[str, Any], source: list[str]
95+
) -> ResourcePatches:
96+
results: ResourcePatches = {}
97+
create_operations = get_schema_create_operations(schema_data)
98+
shapes = {}
99+
for create_operation in create_operations:
100+
shapes.update(get_shapes(boto_data, create_operation))
101+
create_shape = (
102+
boto_data.get("operations", {})
103+
.get(create_operation, {})
104+
.get("input", {})
105+
.get("shape")
106+
)
107+
108+
for member, member_data in (
109+
boto_data.get("shapes", {}).get(create_shape, {}).get("members", {}).items()
110+
):
111+
for p_name, p_data in schema_data.get("properties", {}).items():
112+
if p_name in skip_property_names:
113+
continue
114+
if p_name.lower() == member.lower():
115+
116+
path = f"/properties/{p_name}"
117+
118+
if "$ref" in p_data:
119+
pointer = p_data["$ref"].split("/")
120+
p_data = schema_data.get(pointer[1], {}).get(pointer[2], {})
121+
if not p_data:
122+
continue
123+
path = f"/{'/'.join(pointer[1:])}"
124+
125+
# skip if we already have an enum or pattern
126+
if any([p_data.get(field) for field in _fields]):
127+
continue
128+
129+
member_shape_name = member_data.get("shape")
130+
member_shape = boto_data.get("shapes", {}).get(
131+
member_shape_name, {}
132+
)
133+
134+
if not any([member_shape.get(field) for field in _fields]):
135+
continue
136+
137+
results[path] = Patch(
138+
source=source,
139+
shape=member_shape_name,
140+
)
141+
142+
return results
143+
144+
145+
def get_resource_patches(
146+
service_dir: Path, schema_path: Path, service_name: str, last_date: str
147+
) -> AllPatches:
148+
149+
results: AllPatches = {}
150+
151+
services_file = Path(f"{service_dir}/{last_date}/service-2.json")
152+
if not services_file.exists():
153+
return results
154+
155+
boto_data = {}
156+
with open(services_file, "r") as f:
157+
boto_data = json.load(f)
158+
159+
if not boto_data:
160+
return results
161+
162+
resources = list(schema_path.glob(f"aws-{service_name}-*.json"))
163+
if not resources:
164+
print(f"No resource files found for {service_name}")
165+
166+
for resource in resources:
167+
with open(resource, "r") as f:
168+
schema_data = json.load(f)
169+
170+
resource_type = schema_data.get("typeName", "")
171+
if resource_type not in results:
172+
results[resource_type] = {}
173+
174+
results[resource_type].update(
175+
_per_resource_patch(
176+
schema_data, boto_data, [service_dir.name, last_date]
177+
)
178+
)
179+
180+
return results
181+
182+
183+
def each_boto_service(boto_path: Path, schema_path: Path) -> AllPatches:
184+
results: AllPatches = {}
185+
_results: AllPatches = {}
186+
boto_path = boto_path / "botocore-master" / "botocore" / "data"
187+
188+
for service_dir in boto_path.iterdir():
189+
if not service_dir.is_dir():
190+
continue
191+
192+
service_name = renamer(service_dir.name)
193+
194+
if service_name in skip:
195+
continue
196+
197+
last_date = get_last_date(service_dir)
198+
199+
_results = get_resource_patches(
200+
service_dir, schema_path, service_name, last_date
201+
)
202+
for type_name, patches in _results.items():
203+
if patches:
204+
results[type_name] = patches
205+
206+
return results
207+
208+
209+
def build_automated_patches(boto_path: Path, schema_path: Path) -> AllPatches:
210+
return each_boto_service(boto_path, schema_path)

0 commit comments

Comments
 (0)