Skip to content

Commit ba177be

Browse files
committed
[skip changelog] Add missing athena script
1 parent 0eb2d25 commit ba177be

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

Diff for: .github/tools/fetch_athena_stats.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
import boto3
2+
import semver
3+
import os
4+
import logging
5+
import uuid
6+
import time
7+
8+
9+
# logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
10+
log = logging.getLogger()
11+
logging.getLogger("boto3").setLevel(logging.CRITICAL)
12+
logging.getLogger("botocore").setLevel(logging.CRITICAL)
13+
logging.getLogger("urllib3").setLevel(logging.CRITICAL)
14+
15+
16+
def execute(client, statement, dest_s3_output_location):
17+
log.info("execute query: {} dumping in {}".format(statement, dest_s3_output_location))
18+
result = client.start_query_execution(
19+
QueryString=statement,
20+
ClientRequestToken=str(uuid.uuid4()),
21+
ResultConfiguration={
22+
"OutputLocation": dest_s3_output_location,
23+
},
24+
)
25+
execution_id = result["QueryExecutionId"]
26+
log.info("wait for query {} completion".format(execution_id))
27+
wait_for_query_execution_completion(client, execution_id)
28+
log.info("operation successful")
29+
return execution_id
30+
31+
32+
def wait_for_query_execution_completion(client, query_execution_id):
33+
query_ended = False
34+
while not query_ended:
35+
query_execution = client.get_query_execution(QueryExecutionId=query_execution_id)
36+
state = query_execution["QueryExecution"]["Status"]["State"]
37+
if state == "SUCCEEDED":
38+
query_ended = True
39+
elif state in ["FAILED", "CANCELLED"]:
40+
raise BaseException(
41+
"query failed or canceled: {}".format(query_execution["QueryExecution"]["Status"]["StateChangeReason"])
42+
)
43+
else:
44+
time.sleep(1)
45+
46+
47+
def valid(key):
48+
split = key.split("_")
49+
if len(split) < 1:
50+
return False
51+
try:
52+
semver.parse(split[0])
53+
except ValueError:
54+
return False
55+
return True
56+
57+
58+
def get_results(client, execution_id):
59+
results_paginator = client.get_paginator("get_query_results")
60+
results_iter = results_paginator.paginate(QueryExecutionId=execution_id, PaginationConfig={"PageSize": 1000})
61+
res = {}
62+
for results_page in results_iter:
63+
for row in results_page["ResultSet"]["Rows"][1:]:
64+
# Loop through the JSON objects
65+
key = row["Data"][0]["VarCharValue"]
66+
if valid(key):
67+
res[key] = row["Data"][1]["VarCharValue"]
68+
69+
return res
70+
71+
72+
def convert_data(data):
73+
result = []
74+
for key, value in data.items():
75+
# 0.18.0_macOS_64bit.tar.gz
76+
split_key = key.split("_")
77+
if len(split_key) != 3:
78+
continue
79+
(version, os_version, arch) = split_key
80+
arch_split = arch.split(".")
81+
if len(arch_split) < 1:
82+
continue
83+
arch = arch_split[0]
84+
if len(arch) > 10:
85+
# This can't be an architecture really.
86+
# It's an ugly solution but works for now so deal with it.
87+
continue
88+
repo = os.environ["GITHUB_REPOSITORY"].split("/")[1]
89+
result.append(
90+
{
91+
"type": "gauge",
92+
"name": "arduino.downloads.total",
93+
"value": value,
94+
"host": os.environ["GITHUB_REPOSITORY"],
95+
"tags": [
96+
f"version:{version}",
97+
f"os:{os_version}",
98+
f"arch:{arch}",
99+
"cdn:downloads.arduino.cc",
100+
f"project:{repo}",
101+
],
102+
}
103+
)
104+
105+
return result
106+
107+
108+
if __name__ == "__main__":
109+
DEST_S3_OUTPUT = os.environ["AWS_ATHENA_OUTPUT_LOCATION"]
110+
AWS_ATHENA_SOURCE_TABLE = os.environ["AWS_ATHENA_SOURCE_TABLE"]
111+
112+
session = boto3.session.Session(region_name="us-east-1")
113+
athena_client = session.client("athena")
114+
115+
# Load all partitions before querying downloads
116+
execute(athena_client, f"MSCK REPAIR TABLE {AWS_ATHENA_SOURCE_TABLE};", DEST_S3_OUTPUT)
117+
118+
query = f"""SELECT replace(json_extract_scalar(url_decode(url_decode(querystring)),
119+
'$.data.url'), 'https://downloads.arduino.cc/arduino-ide/arduino-ide_', '')
120+
AS flavor, count(json_extract(url_decode(url_decode(querystring)),'$')) AS gauge
121+
FROM {AWS_ATHENA_SOURCE_TABLE}
122+
WHERE json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
123+
LIKE 'https://downloads.arduino.cc/arduino-ide/arduino-ide_%'
124+
AND json_extract_scalar(url_decode(url_decode(querystring)),'$.data.url')
125+
NOT LIKE '%latest%' -- exclude latest redirect
126+
group by 1 ;"""
127+
exec_id = execute(athena_client, query, DEST_S3_OUTPUT)
128+
results = get_results(athena_client, exec_id)
129+
result_json = convert_data(results)
130+
131+
print(f"::set-output name=result::{result_json}")

0 commit comments

Comments
 (0)