Skip to content

Commit 5bfea39

Browse files
authored
Merge pull request #44 from aws-observability/edit-retry-mechanism
Edit retry mechanism
2 parents abe5925 + 997a46f commit 5bfea39

File tree

3 files changed

+43
-16
lines changed

3 files changed

+43
-16
lines changed

.github/workflows/actions/execute_and_retry/action.yml

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ inputs:
66
pre-command:
77
required: false
88
type: string
9-
# (Optional) Number of retries to perform. Default is 2
9+
# (Optional) Number of retries to perform. Default is 3
1010
max_retry:
1111
required: false
1212
type: number
13-
default: 2
13+
default: 3
1414
# (Required) Command to execute with the retry mechanism
1515
command:
1616
required: true
@@ -19,6 +19,11 @@ inputs:
1919
cleanup:
2020
required: false
2121
type: string
22+
# (Optional) Time to wait between each attempt in seconds. Default is 10 seconds
23+
sleep_time:
24+
required: false
25+
type: number
26+
default: 10
2227
# (Optional) Follow-up command after the main command is finished.
2328
post-command:
2429
required: false
@@ -35,26 +40,35 @@ runs:
3540
COMMAND: ${{ inputs.command }}
3641
CLEANUP: ${{ inputs.cleanup }}
3742
POST_COMMAND: ${{ inputs.post-command }}
43+
SLEEP_TIME: ${{ inputs.sleep_time }}
3844
run: |
45+
echo "Starting the execute_and_retry action for command $COMMAND"
46+
echo "Executing pre-command for the execute_and_retry action"
3947
eval "$PRE_COMMAND"
4048
4149
retry_counter=0
4250
while [ $retry_counter -lt $MAX_RETRY ]; do
51+
echo "Attempt Number $retry_counter for execute_and_retry action"
52+
4353
attempt_failed=0
4454
eval "$COMMAND" || attempt_failed=$?
4555
4656
if [ $attempt_failed -ne 0 ]; then
57+
echo "Command failed for execute_and_retry action, executing cleanup command for another attempt"
58+
4759
eval "$CLEANUP"
4860
retry_counter=$(($retry_counter+1))
49-
sleep 5
61+
sleep "$SLEEP_TIME"
5062
else
63+
echo "Command executed successfully for execute_and_retry"
5164
break
5265
fi
53-
54-
if [ $retry_counter -eq $max_retry ]; then
55-
echo "Max retry reached, command failed to execute properly. Exiting code"
66+
if [[ $retry_counter -ge $MAX_RETRY ]]; then
67+
echo "Max retry reached, command failed to execute properly. Exiting action"
5668
exit 1
5769
fi
5870
done
5971

60-
eval "$POST_COMMAND"
72+
echo "Executing post-command for the execute_and_retry action"
73+
eval "$POST_COMMAND"
74+
echo "Exiting execute_and_retry action"

.github/workflows/appsignals-e2e-eks-test.yml

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ jobs:
108108
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
109109
--region ${{ inputs.aws-region }} \
110110
--approve"
111+
cleanup: "eksctl delete iamserviceaccount \
112+
--name service-account-${{ env.TESTING_ID }} \
113+
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
114+
--cluster ${{ inputs.test-cluster-name }} \
115+
--region ${{ inputs.aws-region }}"
116+
sleep_time: 60
111117

112118
- name: Initiate Terraform
113119
uses: ./.github/workflows/actions/execute_and_retry
@@ -148,7 +154,7 @@ jobs:
148154
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
149155
if [ $deployment_failed -eq 0 ]; then
150156
. ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
151-
execute_and_retry 2 \
157+
execute_and_retry 3 \
152158
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/enable-app-signals.sh \
153159
${{ inputs.test-cluster-name }} \
154160
${{ inputs.aws-region }} \
@@ -157,10 +163,11 @@ jobs:
157163
${{ inputs.test-cluster-name }} \
158164
${{ inputs.aws-region }} \
159165
${{ env.SAMPLE_APP_NAMESPACE }} && \
160-
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
166+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}" \
167+
60
161168
162-
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}"
163-
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}"
169+
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" "" 60
170+
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" "" 10
164171
165172
echo "Attempting to connect to the main sample app endpoint"
166173
main_sample_app_endpoint=http://$(terraform output sample_app_endpoint)
@@ -224,7 +231,7 @@ jobs:
224231
break
225232
fi
226233
227-
if [ $retry_counter -eq $max_retry ]; then
234+
if [ $retry_counter -ge $max_retry ]; then
228235
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
229236
exit 1
230237
fi

.github/workflows/util/execute_and_retry.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,34 @@
33
# This function is for retrying commands in the case they fail. It accepts three arguments
44
# $1: Number of retries it will attempt
55
# $2: Command to execute
6-
# $3: (Optional) Command for cleaning up resources if $2 fails
6+
# $3: (Optional) Command for cleaning up resources if $2 fails.
7+
# $4: (Optional) Sleep time between run. Default value is 10 seconds
78
execute_and_retry () {
89
# Warning: The variables called in this function are not local and will be shared with the calling function.
910
# Make sure that the variable names do not conflict
1011
execute_retry_counter=0
1112
max_execute_retry=$1
1213
command=$2
1314
cleanup=$3
15+
sleep_time=$4
16+
echo "Initiating execute_and_retry.sh script for command $command"
1417
while [ $execute_retry_counter -lt $max_execute_retry ]; do
18+
echo "Attempt Number $execute_retry_counter for execute_and_retry.sh"
1519
attempt_failed=0
1620
eval "$command" || attempt_failed=$?
1721

1822
if [ $attempt_failed -ne 0 ]; then
23+
echo "Command failed for execute_and_retry.sh, executing cleanup command for another attempt"
1924
eval "$cleanup"
2025
execute_retry_counter=$(($execute_retry_counter+1))
21-
sleep 5
26+
sleep "${sleep_time:-10}"
2227
else
28+
echo "Command executed successfully for execute_and_retry.sh, exiting script"
2329
break
2430
fi
2531

26-
if [ $execute_retry_counter -eq $max_execute_retry ]; then
27-
echo "Max retry reached, command failed to execute properly. Exiting code"
32+
if [ "$execute_retry_counter" -ge "$max_execute_retry" ]; then
33+
echo "Max retry reached, command failed to execute properly. Exiting execute_and_retry.sh script"
2834
exit 1
2935
fi
3036
done

0 commit comments

Comments
 (0)