-
Notifications
You must be signed in to change notification settings - Fork 45
/
Copy pathrun_integration_tests.sh
executable file
·295 lines (261 loc) · 13.9 KB
/
run_integration_tests.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
#!/bin/bash
# Usage - run commands from repo root:
# To check if new changes to the layer cause changes to any snapshots:
# BUILD_LAYERS=true DD_API_KEY=XXXX aws-vault exec sso-serverless-sandbox-account-admin -- ./scripts/run_integration_tests.sh
# To regenerate snapshots:
# UPDATE_SNAPSHOTS=true DD_API_KEY=XXXX aws-vault exec sso-serverless-sandbox-account-admin -- ./scripts/run_integration_tests.sh
set -e
# These values need to be in sync with serverless.yml, where there needs to be a function
# defined for every handler_runtime combination
LAMBDA_HANDLERS=("async-metrics" "sync-metrics")
LOGS_WAIT_SECONDS=20
script_path=${BASH_SOURCE[0]}
scripts_dir=$(dirname $script_path)
repo_dir=$(dirname $scripts_dir)
integration_tests_dir="$repo_dir/tests/integration"
script_utc_start_time=$(date -u +"%Y%m%dT%H%M%S")
mismatch_found=false
# Format :
# [0]: serverless runtime name
# [1]: python version
# [2]: random 8-character ID to avoid collisions with other runs
python38=("python3.8" "3.8" $(xxd -l 4 -c 4 -p < /dev/random))
python39=("python3.9" "3.9" $(xxd -l 4 -c 4 -p < /dev/random))
python310=("python3.10" "3.10" $(xxd -l 4 -c 4 -p < /dev/random))
python311=("python3.11" "3.11" $(xxd -l 4 -c 4 -p < /dev/random))
python312=("python3.12" "3.12" $(xxd -l 4 -c 4 -p < /dev/random))
python313=("python3.13" "3.13" $(xxd -l 4 -c 4 -p < /dev/random))
PARAMETERS_SETS=("python38" "python39" "python310" "python311" "python312" "python313")
if [ -z "$RUNTIME_PARAM" ]; then
echo "Python version not specified, running for all python versions."
else
RUNTIME_PARAM_NO_DOT=$(echo $RUNTIME_PARAM | sed 's/\.//')
echo "Python version is specified: $RUNTIME_PARAM"
PARAMETERS_SETS=(python${RUNTIME_PARAM_NO_DOT})
BUILD_LAYER_VERSION=python$RUNTIME_PARAM_NO_DOT[1]
fi
if [ -z "$AWS_SECRET_ACCESS_KEY" ]; then
echo "No AWS credentials were found in the environment."
echo "Note that only Datadog employees can run these integration tests."
exit 1
fi
if [ -z "$DD_API_KEY" ]; then
echo "No DD_API_KEY env var set, exiting"
exit 1
fi
if [ -z "$ARCH" ]; then
echo "No ARCH env var set, exiting"
exit 1
fi
if [ -n "$UPDATE_SNAPSHOTS" ]; then
echo "Overwriting snapshots in this execution"
fi
if [ -n "$BUILD_LAYERS" ]; then
echo "Building layers that will be deployed with our test functions"
if [ -n "$BUILD_LAYER_VERSION" ]; then
PYTHON_VERSION=${!BUILD_LAYER_VERSION} source $scripts_dir/build_layers.sh
else
source $scripts_dir/build_layers.sh
fi
else
echo "Not building layers, ensure they've already been built or re-run with 'BUILD_LAYERS=true DD_API_KEY=XXXX ./scripts/run_integration_tests.sh'"
fi
SERVERLESS_FRAMEWORK_ARCH=""
if [ "$ARCH" = "amd64" ]; then
SERVERLESS_FRAMEWORK_ARCH="x86_64"
else
SERVERLESS_FRAMEWORK_ARCH="arm64"
fi
cd $integration_tests_dir
input_event_files=$(ls ./input_events)
# Sort event files by name so that snapshots stay consistent
input_event_files=($(for file_name in ${input_event_files[@]}; do echo $file_name; done | sort))
# Always remove the stack(s) before exiting, no matter what
function remove_stack() {
for parameters_set in "${PARAMETERS_SETS[@]}"; do
serverless_runtime=$parameters_set[0]
python_version=$parameters_set[1]
run_id=$parameters_set[2]
echo "Removing stack for stage : ${!run_id}"
PYTHON_VERSION=${!python_version} RUNTIME=$parameters_set SERVERLESS_RUNTIME=${!serverless_runtime} SLS_ARCH=${SERVERLESS_FRAMEWORK_ARCH} \
serverless remove --stage ${!run_id}
done
}
trap remove_stack EXIT
for parameters_set in "${PARAMETERS_SETS[@]}"; do
serverless_runtime=$parameters_set[0]
python_version=$parameters_set[1]
run_id=$parameters_set[2]
echo "Deploying functions for runtime : $parameters_set, serverless runtime : ${!serverless_runtime}, \
python version : ${!python_version} and run id : ${!run_id}"
PYTHON_VERSION=${!python_version} RUNTIME=$parameters_set SERVERLESS_RUNTIME=${!serverless_runtime} ARCH=${ARCH} SLS_ARCH=${SERVERLESS_FRAMEWORK_ARCH} \
serverless deploy --stage ${!run_id}
echo "Invoking functions for runtime $parameters_set"
set +e # Don't exit this script if an invocation fails or there's a diff
for handler_name in "${LAMBDA_HANDLERS[@]}"; do
function_name="${handler_name}_python"
echo "$function_name"
# Invoke function once for each input event
for input_event_file in "${input_event_files[@]}"; do
# Get event name without trailing ".json" so we can build the snapshot file name
input_event_name=$(echo "$input_event_file" | sed "s/.json//")
snapshot_path="./snapshots/return_values/${handler_name}_${input_event_name}.json"
return_value=$(PYTHON_VERSION=${!python_version} RUNTIME=$parameters_set SERVERLESS_RUNTIME=${!serverless_runtime} SLS_ARCH=${SERVERLESS_FRAMEWORK_ARCH} \
serverless invoke --stage ${!run_id} -f "$function_name" --path "./input_events/$input_event_file")
if [ ! -f $snapshot_path ]; then
# If the snapshot file doesn't exist yet, we create it
echo "Writing return value to $snapshot_path because no snapshot exists yet"
echo "$return_value" >$snapshot_path
elif [ -n "$UPDATE_SNAPSHOTS" ]; then
# If $UPDATE_SNAPSHOTS is set to true, write the new logs over the current snapshot
echo "Overwriting return value snapshot for $snapshot_path"
echo "$return_value" >$snapshot_path
else
# Compare new return value to snapshot
diff_output=$(echo "$return_value" | diff - $snapshot_path)
if [ $? -eq 1 ]; then
echo "Failed: Return value for $function_name does not match snapshot:"
echo "$diff_output"
mismatch_found=true
else
echo "Ok: Return value for $function_name with $input_event_name event matches snapshot"
fi
fi
done
done
done
set -e
echo "Sleeping $LOGS_WAIT_SECONDS seconds to wait for logs to appear in CloudWatch..."
sleep $LOGS_WAIT_SECONDS
set +e # Don't exit this script if there is a diff or the logs endpoint fails
echo "Fetching logs for invocations and comparing to snapshots"
for handler_name in "${LAMBDA_HANDLERS[@]}"; do
for parameters_set in "${PARAMETERS_SETS[@]}"; do
function_name="${handler_name}_python"
function_snapshot_path="./snapshots/logs/${handler_name}_${parameters_set}.log"
serverless_runtime=$parameters_set[0]
python_version=$parameters_set[1]
run_id=$parameters_set[2]
# Fetch logs with serverless cli, retrying to avoid AWS account-wide rate limit error
retry_counter=0
while [ $retry_counter -lt 10 ]; do
raw_logs=$(PYTHON_VERSION=${!python_version} RUNTIME=$parameters_set SERVERLESS_RUNTIME=${!serverless_runtime} ARCH=${ARCH} SLS_ARCH=${SERVERLESS_FRAMEWORK_ARCH} \
serverless logs --stage ${!run_id} -f $function_name --startTime $script_utc_start_time)
fetch_logs_exit_code=$?
if [ $fetch_logs_exit_code -eq 1 ]; then
echo "Retrying fetch logs for $function_name..."
retry_counter=$(($retry_counter + 1))
sleep 10
continue
fi
break
done
if [ $retry_counter -eq 9 ]; then
echo "FAILURE: Could not retrieve logs for $function_name"
echo "Error from final attempt to retrieve logs:"
echo $raw_logs
exit 1
fi
# Replace invocation-specific data like timestamps and IDs with XXXX to normalize logs across executions
logs=$(
echo "$raw_logs" |
node parse-json.js |
# Filter serverless cli errors
sed '/Serverless: Recoverable error occurred/d' |
# Remove RequestsDependencyWarning from botocore/vendored/requests/__init__.py
sed '/RequestsDependencyWarning/d' |
# Remove blank lines
sed '/^$/d' |
# Normalize Lambda runtime REPORT logs
sed -E 's/(RequestId|TraceId|SegmentId|Duration|init|Memory Used|"e"): [a-z0-9\.\-]+/\1: XXXX/g' |
sed -E 's/(python:3.[0-9]+\.v)[0-9]+/\1X/g' |
# Normalize HTTP headers
sed -E "s/(x-datadog-parent-id:|x-datadog-trace-id:|Content-Length:)[0-9]+/\1XXXX/g" |
# Remove Account ID
sed -E "s/(account_id:)[0-9]+/\1XXXX/g" |
# Normalize timestamps in datapoints POSTed to DD
sed -E 's/"points": \[\[[0-9\.]+,/"points": \[\[XXXX,/g' |
# Strip API key from logged requests
sed -E "s/(api_key=|'api_key': '|DD-API-KEY:)[a-z0-9\.\-]+/\1XXXX/g" |
# Normalize package version so that these snapshots aren't broken on version bumps
sed -E "s/(dd_lambda_layer:datadog-python[0-9]+_)[0-9]+\.[0-9]+\.[0-9]+/\1X\.X\.X/g" |
sed -E "s/(datadog_lambda:v)([0-9]+\.[0-9]+\.[0-9]+)/\1XX/g" |
sed -E "s/(datadogpy\/)([0-9]+\.[0-9]+\.[0-9]+)/\1XX/g" |
sed -E "s/(python )([0-9]\.[0-9]+\.[0-9]+)/\1XX/g" |
# Strip out run ID (from function name, resource, etc.)
sed -E "s/${!run_id}/XXXX/g" |
# Normalize python-requests version
sed -E "s/(User-Agent:python-requests\/)[0-9]+\.[0-9]+\.[0-9]+/\1X\.X\.X/g" |
sed -E "s/(\"http.useragent\"\: \"python-requests\/)[0-9]+\.[0-9]+\.[0-9]+/\1X\.X\.X/g" |
# Strip out trace/span/parent/timestamps
sed -E "s/(\"trace_id\"\: \")[A-Z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"span_id\"\: \")[A-Z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"parent_id\"\: \")[A-Z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"request_id\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"http.source_ip\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"http.user_agent\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"function_trigger.event_source_arn\"\: \")[A-Za-z0-9\/\.\:\-]+/\1XXXX/g" |
sed -E "s/(\"duration\"\: )[0-9\.\-]+/\1\"XXXX\"/g" |
sed -E "s/(\"start\"\: )[0-9\.\-]+/\1\"XXXX\"/g" |
sed -E "s/(\"system\.pid\"\: )[0-9\.\-]+/\1\"XXXX\"/g" |
sed -E "s/(\"process_id\"\: )[0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"runtime-id\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/([a-zA-Z0-9]+)(\.execute-api\.[a-z0-9\-]+\.amazonaws\.com)/XXXX\2/g" |
sed -E "s/(\"apiid\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"apiname\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(\"function_trigger.event_source_arn\"\: \")[a-z0-9\.\-\:]+/\1XXXX/g" |
sed -E "s/(\"event_id\"\: \")[a-zA-Z0-9\:\-]+/\1XXXX/g" |
sed -E "s/(\"message_id\"\: \")[a-zA-Z0-9\:\-]+/\1XXXX/g" |
sed -E "s/(\"request_id\"\:\ \")[a-zA-Z0-9\-\=]+/\1XXXX/g" |
sed -E "s/(\"connection_id\"\:\ \")[a-zA-Z0-9\-]+/\1XXXX/g" |
sed -E "s/(\"shardId\-)([0-9]+)\:([a-zA-Z0-9]+)[a-zA-Z0-9]/\1XXXX:XXXX/g" |
sed -E "s/(\"shardId\-)[0-9a-zA-Z]+/\1XXXX/g" |
sed -E "s/(\"datadog_lambda\"\: \")([0-9]+\.[0-9]+\.[0-9]+)/\1X.X.X/g" |
sed -E "s/(\"partition_key\"\:\ \")[a-zA-Z0-9\-]+/\1XXXX/g" |
sed -E "s/(\"object_etag\"\:\ \")[a-zA-Z0-9\-]+/\1XXXX/g" |
sed -E "s/(\"dd_trace\"\: \")([0-9]+\.[0-9]+\.[0-9]+)/\1X.X.X/g" |
sed -E "s/(traceparent\:)([A-Za-z0-9\-]+)/\1XXX/g" |
sed -E "s/(tracestate\:)([A-Za-z0-9\-\=\:\;].+)/\1XXX/g" |
sed -E "s/(\"_dd.p.tid\"\: \")[a-z0-9\.\-]+/\1XXXX/g" |
sed -E "s/(_dd.p.tid=)[a-z0-9\.\-]+/\1XXXX/g" |
sed -E 's/arch (aarch64|x86_64)/arch XXXX/g' |
# Parse out account ID in ARN
sed -E "s/([a-zA-Z0-9]+):([a-zA-Z0-9]+):([a-zA-Z0-9]+):([a-zA-Z0-9\-]+):([a-zA-Z0-9\-\:]+)/\1:\2:\3:\4:XXXX:\4/g" |
sed -E "/init complete at epoch/d" |
sed -E "/main started at epoch/d"
)
if [ ! -f $function_snapshot_path ]; then
# If no snapshot file exists yet, we create one
echo "Writing logs to $function_snapshot_path because no snapshot exists yet"
echo "$logs" >$function_snapshot_path
else
# Compare new logs to snapshots
diff_output=$(echo "$logs" | sort | diff -w - <(sort $function_snapshot_path))
if [ $? -eq 1 ]; then
if [ -n "$UPDATE_SNAPSHOTS" ]; then
# If $UPDATE_SNAPSHOTS is set to true write the new logs over the current snapshot
echo "Overwriting log snapshot for $function_snapshot_path"
echo "$logs" >$function_snapshot_path
else
echo "Failed: Mismatch found between new $function_name logs (first) and snapshot (second):"
echo "$diff_output"
mismatch_found=true
fi
else
echo "Ok: New logs for $function_name match snapshot"
fi
fi
done
done
set -e
if [ "$mismatch_found" = true ]; then
echo "FAILURE: A mismatch between new data and a snapshot was found and printed above."
echo "If the change is expected, generate new snapshots by running 'UPDATE_SNAPSHOTS=true DD_API_KEY=XXXX ./scripts/run_integration_tests.sh'"
echo "Make sure https://httpstat.us/400/ is UP for 'http_error' test case"
exit 1
fi
if [ -n "$UPDATE_SNAPSHOTS" ]; then
echo "SUCCESS: Wrote new snapshots for all functions"
exit 0
fi
echo "SUCCESS: No difference found between snapshots and new return values or logs"