Skip to content

Commit 0c1f41b

Browse files
author
Roberto Sora
authored
[skip changelog] Add stats fetching from Arduino CDN using AWS Athena (arduino#666)
* [skip changelog] Add stats fetching from Arduino CDN using AWS Athena * Fix path typo for athena stats * Fix path typo for athena stats again * Add sh * Add checkout step * Add STATS_ prefix to stats secret and env vars * Use latest version of jq * Use latest version of jq inside fetch action * Use PATH override to use latest version of jq inside fetch action * Fix path typo * Remove push event
1 parent 932f0ed commit 0c1f41b

File tree

2 files changed

+143
-0
lines changed

2 files changed

+143
-0
lines changed

.github/tools/fetch_athena_stats.sh

+93
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
#!/usr/bin/env bash
2+
3+
# This script performs the following:
4+
# 1. Run the query, use jq to capture the QueryExecutionId, and then capture that into bash variable
5+
# 2. Wait for the query to finish running (240 seconds).
6+
# 3. Get the results.
7+
# 4. Json data points struct build
8+
9+
# Expected env variables are:
10+
# AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY for accessing AWS resources
11+
# AWS_ATHENA_SOURCE_TABLE
12+
# AWS_ATHENA_OUTPUT_LOCATION
13+
# GITHUB_REPOSITORY
14+
15+
set -euo pipefail
16+
17+
! read -r -d '' query << EOM
18+
select
19+
replace(url_extract_path("d.url"), '/arduino-cli/arduino-cli_', '') as flavor,
20+
count("id") as gauge
21+
from ${AWS_ATHENA_SOURCE_TABLE}
22+
where "d.url" like 'https://downloads.arduino.cc/arduino-cli/arduino-cli_%'
23+
and "d.url" not like '%latest%' -- exclude latest redirect
24+
and "d.url" not like '%alpha%' -- exclude early alpha releases
25+
and "d.url" not like '%.tar.bz2%' -- exclude very old releases archive formats
26+
group by 1
27+
EOM
28+
29+
queryExecutionId=$(
30+
aws athena start-query-execution \
31+
--query-string "${query}" \
32+
--query-execution-context "Database=demo_books" \
33+
--result-configuration "OutputLocation=${AWS_ATHENA_OUTPUT_LOCATION}" \
34+
--region us-east-1 | jq -r ".QueryExecutionId"
35+
)
36+
37+
echo "QueryExecutionId is ${queryExecutionId}"
38+
for i in $(seq 1 120); do
39+
queryState=$( aws athena get-query-execution \
40+
--query-execution-id "${queryExecutionId}" \
41+
--region us-east-1 | jq -r ".QueryExecution.Status.State"
42+
);
43+
44+
if [[ "${queryState}" == "SUCCEEDED" ]]; then
45+
break;
46+
fi;
47+
48+
echo "QueryExecutionId ${queryExecutionId} - state is ${queryState}"
49+
50+
if [[ "${queryState}" == "FAILED" ]]; then
51+
exit 1;
52+
fi;
53+
54+
sleep 2
55+
done
56+
57+
echo "Query succeeded. Processing data"
58+
queryResult=$( aws athena get-query-results \
59+
--query-execution-id "${queryExecutionId}" \
60+
--region us-east-1 | jq --compact-output
61+
);
62+
63+
! read -r -d '' jsonTemplate << EOM
64+
{
65+
"type": "gauge",
66+
"name": "arduino.downloads.total",
67+
"value": "%s",
68+
"host": "${GITHUB_REPOSITORY}",
69+
"tags": [
70+
"version:%s",
71+
"os:%s",
72+
"arch:%s",
73+
"cdn:downloads.arduino.cc",
74+
"project:arduino-cli"
75+
]
76+
},
77+
EOM
78+
79+
datapoints="["
80+
for row in $(echo "${queryResult}" | jq 'del(.ResultSet.Rows[0])' | jq -r '.ResultSet.Rows[] | .Data' --compact-output); do
81+
value=$(jq -r ".[1].VarCharValue" <<< "${row}")
82+
tag=$(jq -r ".[0].VarCharValue" <<< "${row}")
83+
# Some splitting to obtain 0.6.0, Windows, 32bit elements from string 0.6.0_Windows_32bit.zip
84+
split=($(echo "$tag" | tr '_' '\n'))
85+
if [[ ${#split[@]} -ne 3 ]]; then
86+
continue
87+
fi
88+
archSplit=($(echo "${split[2]}" | tr '.' '\n'))
89+
datapoints+=$(printf "${jsonTemplate}" "${value}" "${split[0]}" "${split[1]}" "${archSplit[0]}")
90+
done
91+
datapoints="${datapoints::-1}]"
92+
93+
echo "::set-output name=result::$(jq --compact-output <<< "${datapoints}")"

.github/workflows/arduino-stats.yaml

+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
name: download-stats
2+
3+
on:
4+
schedule:
5+
# run every day at 12:00:00
6+
- cron: '* 12 * * *'
7+
8+
jobs:
9+
push-stats:
10+
runs-on: ubuntu-latest
11+
12+
steps:
13+
- name: Checkout
14+
uses: actions/checkout@v1
15+
16+
- name: Fetch downloads count form Arduino CDN using AWS Athena
17+
id: fetch
18+
env:
19+
AWS_ACCESS_KEY_ID: ${{ secrets.STATS_AWS_ACCESS_KEY_ID }}
20+
AWS_SECRET_ACCESS_KEY: ${{ secrets.STATS_AWS_SECRET_ACCESS_KEY }}
21+
AWS_ATHENA_SOURCE_TABLE: ${{ secrets.STATS_AWS_ATHENA_SOURCE_TABLE }}
22+
AWS_ATHENA_OUTPUT_LOCATION: ${{ secrets.STATS_AWS_ATHENA_OUTPUT_LOCATION }}
23+
GITHUB_REPOSITORY: ${{ github.repository }}
24+
run: |
25+
# Fetch jq 1.6 as VM has only 1.5 ATM
26+
wget -q https://github.com/stedolan/jq/releases/download/jq-1.6/jq-linux64 -O jq
27+
chmod +x jq
28+
PATH=${{ github.workspace }}:$PATH
29+
.github/tools/fetch_athena_stats.sh
30+
31+
- name: Send metrics
32+
uses: masci/datadog@v1
33+
with:
34+
api-key: ${{ secrets.DD_API_KEY }}
35+
# Metrics input expects YAML but JSON will work just right.
36+
metrics: ${{steps.fetch.outputs.result}}
37+
38+
- name: Report failure
39+
if: failure()
40+
uses: masci/datadog@v1
41+
with:
42+
api-key: ${{ secrets.DD_API_KEY }}
43+
events: |
44+
- title: "Arduino CLI stats failing"
45+
text: "Stats collection failed"
46+
alert_type: "error"
47+
host: ${{ github.repository }}
48+
tags:
49+
- "project:arduino-cli"
50+
- "cdn:downloads.arduino.cc"

0 commit comments

Comments
 (0)