Skip to content

Commit ad71a54

Browse files
authored
Merge pull request #4896 from alexander-demicev/retry
🐛Retry fetching user data from secretmanager if first request fails
2 parents 4b3f416 + 481512b commit ad71a54

File tree

1 file changed

+51
-14
lines changed

1 file changed

+51
-14
lines changed

pkg/cloud/services/secretsmanager/secret_fetch_script.go

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ SECRET_PREFIX="{{.SecretPrefix}}"
4848
CHUNKS="{{.Chunks}}"
4949
FILE="/etc/secret-userdata.txt"
5050
FINAL_INDEX=$((CHUNKS - 1))
51+
MAX_RETRIES=10
52+
RETRY_DELAY=10 # in seconds
5153
5254
# Log an error and exit.
5355
# Args:
@@ -115,6 +117,7 @@ check_aws_command() {
115117
;;
116118
esac
117119
}
120+
118121
delete_secret_value() {
119122
local id="${SECRET_PREFIX}-${1}"
120123
local out
@@ -126,19 +129,27 @@ delete_secret_value() {
126129
aws secretsmanager ${ENDPOINT} --region ${REGION} delete-secret --force-delete-without-recovery --secret-id "${id}" 2>&1
127130
)
128131
local delete_return=$?
129-
set -o errexit
130-
set -o nounset
131-
set -o pipefail
132132
check_aws_command "SecretsManager::DeleteSecret" "${delete_return}" "${out}"
133133
if [ ${delete_return} -ne 0 ]; then
134-
log::error_exit "Could not delete secret value" 2
134+
log::error "Could not delete secret value"
135+
return 1
135136
fi
136137
}
137138
138-
delete_secrets() {
139-
for i in $(seq 0 ${FINAL_INDEX}); do
140-
delete_secret_value "$i"
139+
retry_delete_secret_value() {
140+
local retries=0
141+
while [ ${retries} -lt ${MAX_RETRIES} ]; do
142+
delete_secret_value "$1"
143+
local return_code=$?
144+
if [ ${return_code} -eq 0 ]; then
145+
return 0
146+
else
147+
((retries++))
148+
log::info "Retrying in ${RETRY_DELAY} seconds..."
149+
sleep ${RETRY_DELAY}
150+
fi
141151
done
152+
return 1
142153
}
143154
144155
get_secret_value() {
@@ -159,18 +170,33 @@ get_secret_value() {
159170
)
160171
local get_return=$?
161172
check_aws_command "SecretsManager::GetSecretValue" "${get_return}" "${data}"
173+
if [ ${get_return} -ne 0 ]; then
174+
log::error "could not get secret value"
175+
return 1
176+
fi
162177
set -o errexit
163178
set -o nounset
164179
set -o pipefail
165-
if [ ${get_return} -ne 0 ]; then
166-
log::error "could not get secret value, deleting secret"
167-
delete_secrets
168-
log::error_exit "could not get secret value, but secret was deleted" 1
169-
fi
170180
log::info "appending data to temporary file ${FILE}.gz"
171181
echo "${data}" | base64 -d >>${FILE}.gz
172182
}
173183
184+
retry_get_secret_value() {
185+
local retries=0
186+
while [ ${retries} -lt ${MAX_RETRIES} ]; do
187+
get_secret_value "$1"
188+
local return_code=$?
189+
if [ ${return_code} -eq 0 ]; then
190+
return 0
191+
else
192+
((retries++))
193+
log::info "Retrying in ${RETRY_DELAY} seconds..."
194+
sleep ${RETRY_DELAY}
195+
fi
196+
done
197+
return 1
198+
}
199+
174200
log::info "aws.cluster.x-k8s.io encrypted cloud-init script $0 started"
175201
log::info "secret prefix: ${SECRET_PREFIX}"
176202
log::info "secret count: ${CHUNKS}"
@@ -181,10 +207,21 @@ if test -f "${FILE}"; then
181207
fi
182208
183209
for i in $(seq 0 "${FINAL_INDEX}"); do
184-
get_secret_value "$i"
210+
retry_get_secret_value "$i"
211+
return_code=$?
212+
if [ ${return_code} -ne 0 ]; then
213+
log::error "Failed to get secret value after ${MAX_RETRIES} attempts"
214+
fi
185215
done
186216
187-
delete_secrets
217+
for i in $(seq 0 ${FINAL_INDEX}); do
218+
retry_delete_secret_value "$i"
219+
return_code=$?
220+
if [ ${return_code} -ne 0 ]; then
221+
log::error "Failed to delete secret value after ${MAX_RETRIES} attempts"
222+
log::error_exit "couldn't delete the secret value, exiting" 1
223+
fi
224+
done
188225
189226
log::info "decompressing userdata to ${FILE}"
190227
gunzip "${FILE}.gz"

0 commit comments

Comments
 (0)