Skip to content

Commit 87b2ae7

Browse files
committed
Add workflow to compare engine sync performance
The tool must work through an ever growing list of thousands of libraries and tens of thousands of library releases every hour. For this reason, it's important to consider the performance impact of any changes. The workflow will run the engine through a fixed subset of the registry, comparing the performance of the engine built from the tip of that branch against that of the engine build from the base ref. The engine has three trigger events, each with their own base ref: - push: parent commit - pull request: PR base ref - manual trigger: arbitrary ref selected by the user Notes: The earliest commit compatible with the workflow is c36c3d2, which added the `go:build` task used to build the engine. In order to build engine versions from commits before 7dd8f69, a repository secret named `REPO_SCOPE_TOKEN` must be defined with a GitHub access token that has repo scope in order to install the `github.com/arduino/arduino-modules/git` dependency the engine had at that time, which is hosted in a private repository. All versions from then on can be built by anyone without any secrets, so it is possible to use the workflow to evaluate the immediate effect of pull requests from forks, which do not have secrets access.
1 parent e971426 commit 87b2ae7

File tree

1 file changed

+283
-0
lines changed

1 file changed

+283
-0
lines changed
Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
name: Compare Performance
2+
3+
env:
4+
# See: https://github.com/actions/setup-go/tree/v2#readme
5+
GO_VERSION: "1.16"
6+
REPORTS_ARTIFACT_NAME: reports
7+
8+
# See: https://docs.github.com/en/free-pro-team@latest/actions/reference/events-that-trigger-workflows
9+
on:
10+
push:
11+
paths:
12+
- ".github/workflows/compare-performance.ya?ml"
13+
- "**/go.mod"
14+
- "**/go.sum"
15+
- "Taskfile.ya?ml"
16+
- "**.go"
17+
pull_request:
18+
paths:
19+
- ".github/workflows/compare-performance.ya?ml"
20+
- "**/go.mod"
21+
- "**/go.sum"
22+
- "Taskfile.ya?ml"
23+
- "**.go"
24+
workflow_dispatch:
25+
inputs:
26+
comparison-ref:
27+
description: Comparison ref
28+
29+
jobs:
30+
init:
31+
runs-on: ubuntu-latest
32+
33+
outputs:
34+
base-ref: ${{ steps.base-ref.outputs.ref }}
35+
36+
steps:
37+
- name: Determine comparison ref
38+
id: base-ref
39+
run: |
40+
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
41+
echo "::set-output name=ref::${{ github.event.inputs.comparison-ref }}"
42+
elif [[ "${{ github.event_name }}" == "pull_request" ]]; then
43+
echo "::set-output name=ref::${{ github.base_ref }}"
44+
else
45+
echo "::set-output name=ref::${{ github.event.before }}"
46+
fi
47+
48+
run:
49+
name: Run at ${{ matrix.data.ref }} (${{ matrix.data.description }})
50+
needs: init
51+
runs-on: ubuntu-latest
52+
53+
strategy:
54+
matrix:
55+
data:
56+
# Use two copies of each job to catch job-specific anomalous durations.
57+
- ref: ${{ github.ref }} # The tip of the branch selected in the workflow dispatch dialog's "Use workflow from" menu
58+
description: tip run 1
59+
position: after
60+
- ref: ${{ github.ref }}
61+
description: tip run 2
62+
position: after
63+
- ref: ${{ needs.init.outputs.base-ref }}
64+
description: comparison run 1
65+
position: before
66+
- ref: ${{ needs.init.outputs.base-ref }}
67+
description: comparison run 2
68+
position: before
69+
70+
steps:
71+
- name: Set environment variables
72+
run: |
73+
# See: https://docs.github.com/en/actions/reference/workflow-commands-for-github-actions#setting-an-environment-variable
74+
ENGINE_DATA_PATH="${{ runner.temp }}/engine"
75+
mkdir --parents "$ENGINE_DATA_PATH"
76+
echo "ENGINE_DATA_PATH=${ENGINE_DATA_PATH}" >> "$GITHUB_ENV"
77+
echo "GIT_CLONES_PATH=${ENGINE_DATA_PATH}/gitclones" >> "$GITHUB_ENV"
78+
echo "LIBRARY_ARCHIVES_PATH=${ENGINE_DATA_PATH}/libraries" >> "$GITHUB_ENV"
79+
echo "LOGS_PATH=${ENGINE_DATA_PATH}/logs" >> "$GITHUB_ENV"
80+
echo "CONFIG_PATH=${ENGINE_DATA_PATH}/config.json" >> "$GITHUB_ENV"
81+
echo "REGISTRY_PATH=${ENGINE_DATA_PATH}/registry.txt" >> "$GITHUB_ENV"
82+
echo "REPORTS_PATH=${ENGINE_DATA_PATH}/reports" >> "$GITHUB_ENV"
83+
84+
- name: Checkout repository
85+
uses: actions/checkout@v2
86+
with:
87+
ref: ${{ matrix.data.ref }}
88+
89+
- name: Determine appropriate Go version
90+
id: go-version
91+
run: |
92+
if [[ -f "go.mod" ]]; then
93+
USE_GO_VERSION="${{ env.GO_VERSION }}"
94+
else
95+
# Dependency installation for old engine versions fails when not in GOPATH mode. Go <1.16 uses
96+
# GO111MODULE=auto by default, meaning it will use GOPATH mode. Old Go versions were used by the old engine
97+
# anyway.
98+
USE_GO_VERSION="1.14"
99+
fi
100+
echo "::set-output name=version::$USE_GO_VERSION"
101+
102+
- name: Install Go
103+
uses: actions/setup-go@v2
104+
with:
105+
go-version: ${{ steps.go-version.outputs.version }}
106+
107+
- name: Install Task
108+
uses: arduino/setup-task@v1
109+
with:
110+
repo-token: ${{ secrets.GITHUB_TOKEN }}
111+
version: 3.x
112+
113+
- name: Install latest release of Arduino Lint
114+
run: |
115+
ARDUINO_LINT_INSTALLATION_PATH="${{ runner.temp }}/arduino-lint"
116+
mkdir --parents "$ARDUINO_LINT_INSTALLATION_PATH"
117+
curl \
118+
-fsSL \
119+
https://raw.githubusercontent.com/arduino/arduino-lint/main/etc/install.sh \
120+
| \
121+
BINDIR="$ARDUINO_LINT_INSTALLATION_PATH" \
122+
sh
123+
124+
# Add installation folder to path
125+
echo "$ARDUINO_LINT_INSTALLATION_PATH" >> "$GITHUB_PATH"
126+
127+
- name: Configure Git for `go get` access to private repo
128+
run: |
129+
if ! [[ -f "go.mod" ]]; then
130+
# engine versions prior to 7dd8f69282232919955c82c143fefb14e50d0889 had a dependency that is hosted in a
131+
# private repo. The `go.mod` file was added at the same time the dependency was removed, so its presence can
132+
# be used as the indicator.
133+
git config \
134+
--global url."https://${{ secrets.REPO_SCOPE_TOKEN }}:[email protected]/".insteadOf "https://github.com/"
135+
fi
136+
137+
- name: Build engine
138+
run: |
139+
task go:build
140+
141+
- name: Generate configuration file
142+
run: |
143+
cat > "${{ env.CONFIG_PATH }}" << EOF
144+
{
145+
"BaseDownloadUrl": "https://downloads.arduino.cc/libraries/",
146+
"LibrariesFolder": "${{ env.LIBRARY_ARCHIVES_PATH }}",
147+
"LibrariesIndex": "${{ env.ENGINE_DATA_PATH }}/library_index.json",
148+
"LogsFolder": "${{ env.ENGINE_DATA_PATH }}/logs",
149+
"LibrariesDB": "${{ env.ENGINE_DATA_PATH }}/db.json",
150+
"GitClonesFolder": "${{ env.GIT_CLONES_PATH }}",
151+
"DoNotRunClamav": true
152+
}
153+
EOF
154+
155+
- name: Generate registry file
156+
run: |
157+
FULL_REGISTRY_PATH="${{ runner.temp }}/registry.txt"
158+
curl \
159+
--output "$FULL_REGISTRY_PATH" \
160+
https://raw.githubusercontent.com/arduino/library-registry/1c3f73b279d2845ff139883c78e733e2954437b8/registry.txt
161+
162+
# Only use the first part of the file for the test
163+
head \
164+
-300 \
165+
"$FULL_REGISTRY_PATH" > \
166+
"${{ env.REGISTRY_PATH }}"
167+
168+
- name: Run sync on empty environment
169+
id: fresh
170+
run: |
171+
SECONDS=0
172+
./libraries-repository-engine "${{ env.CONFIG_PATH }}" "${{ env.REGISTRY_PATH }}"
173+
174+
# Define step outputs with the performance data
175+
echo "::set-output name=Type::fresh"
176+
echo "::set-output name=Duration::$SECONDS"
177+
echo "::set-output name=GitClonesSize::$(du --apparent-size --bytes --summarize "${{ env.GIT_CLONES_PATH }}" | cut --fields=1)"
178+
echo "::set-output name=LibraryArchivesSize::$(du --apparent-size --bytes --summarize "${{ env.LIBRARY_ARCHIVES_PATH }}" | cut --fields=1)"
179+
echo "::set-output name=LogsSize::$(du --apparent-size --bytes --summarize "${{ env.LOGS_PATH }}" | cut --fields=1)"
180+
181+
- name: Run sync on populated database
182+
id: populated
183+
run: |
184+
SECONDS=0
185+
./libraries-repository-engine "${{ env.CONFIG_PATH }}" "${{ env.REGISTRY_PATH }}"
186+
187+
# Define step outputs with the performance data
188+
echo "::set-output name=Type::populated"
189+
echo "::set-output name=Duration::$SECONDS"
190+
echo "::set-output name=GitClonesSize::$(du --apparent-size --bytes --summarize "${{ env.GIT_CLONES_PATH }}" | cut --fields=1)"
191+
echo "::set-output name=LibraryArchivesSize::$(du --apparent-size --bytes --summarize "${{ env.LIBRARY_ARCHIVES_PATH }}" | cut --fields=1)"
192+
echo "::set-output name=LogsSize::$(du --apparent-size --bytes --summarize "${{ env.LOGS_PATH }}" | cut --fields=1)"
193+
194+
- name: Create report
195+
run: |
196+
mkdir --parents "${{ env.REPORTS_PATH }}"
197+
cat > "${{ env.REPORTS_PATH }}/$RANDOM.json" << EOF
198+
{
199+
"Ref": "${{ matrix.data.ref }}",
200+
"Description": "${{ matrix.data.description }}",
201+
"Position": "${{ matrix.data.position }}",
202+
"Results": [
203+
${{ toJSON(steps.fresh.outputs) }},
204+
${{ toJSON(steps.populated.outputs) }}
205+
]
206+
}
207+
EOF
208+
209+
- name: Upload report to a workflow artifact
210+
uses: actions/upload-artifact@v2
211+
with:
212+
if-no-files-found: error
213+
path: ${{ env.REPORTS_PATH }}
214+
name: ${{ env.REPORTS_ARTIFACT_NAME }}
215+
216+
results:
217+
needs: run
218+
runs-on: ubuntu-latest
219+
220+
env:
221+
REPORTS_PATH: reports
222+
223+
steps:
224+
- name: Download reports
225+
uses: actions/download-artifact@v2
226+
with:
227+
name: ${{ env.REPORTS_ARTIFACT_NAME }}
228+
path: ${{ env.REPORTS_PATH }}
229+
230+
- name: Print results
231+
shell: python
232+
run: |
233+
import json
234+
import pathlib
235+
236+
reports_path = pathlib.Path("${{ env.REPORTS_PATH }}")
237+
reports = []
238+
for report_path in reports_path.iterdir():
239+
with report_path.open() as report_file:
240+
reports.append(json.load(fp=report_file))
241+
242+
sample_size = 0
243+
summary_data = {
244+
"Duration": [],
245+
"GitClonesSize": [],
246+
"LibraryArchivesSize": [],
247+
"LogsSize": [],
248+
}
249+
for report in reports:
250+
if report["Position"] == "before":
251+
sample_size += 1
252+
for result in report["Results"]:
253+
for key in list(summary_data):
254+
type_index = None
255+
for index, summary_item in enumerate(summary_data[key]):
256+
if summary_item["type"] == result["Type"]:
257+
type_index = index
258+
break
259+
if type_index is None:
260+
summary_data[key].append(
261+
{"type": result["Type"], "before": 0, "after": 0}
262+
)
263+
type_index = len(summary_data[key]) - 1
264+
summary_data[key][type_index][report["Position"]] += int(result[key])
265+
266+
print("% change:")
267+
for key in list(summary_data):
268+
for type_data in summary_data[key]:
269+
print(
270+
"{key} ({type}): {value}".format(
271+
key=key,
272+
type=type_data["type"],
273+
value=round(
274+
100
275+
* (type_data["after"] - type_data["before"])
276+
/ type_data["before"]
277+
),
278+
)
279+
)
280+
281+
print("::group::Full results")
282+
print(json.dumps(obj=reports, indent=2))
283+
print("::endgroup::")

0 commit comments

Comments
 (0)