Skip to content

Commit 19dbc13

Browse files
committed
Update retry mechanism
1 parent fb18068 commit 19dbc13

File tree

4 files changed

+106
-16
lines changed

4 files changed

+106
-16
lines changed

.github/workflows/actions/execute_and_retry/action.yml

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@ inputs:
66
pre-command:
77
required: false
88
type: string
9-
# (Optional) Number of retries to perform. Default is 2
9+
# (Optional) Number of retries to perform. Default is 3
1010
max_retry:
1111
required: false
1212
type: number
13-
default: 2
13+
default: 3
1414
# (Required) Command to execute with the retry mechanism
1515
command:
1616
required: true
@@ -19,6 +19,11 @@ inputs:
1919
cleanup:
2020
required: false
2121
type: string
22+
# (Optional) Time to wait between each attempt in seconds. Default is 10 seconds
23+
sleep_time:
24+
required: false
25+
type: number
26+
default: 10
2227
# (Optional) Follow-up command after the main command is finished.
2328
post-command:
2429
required: false
@@ -35,26 +40,35 @@ runs:
3540
COMMAND: ${{ inputs.command }}
3641
CLEANUP: ${{ inputs.cleanup }}
3742
POST_COMMAND: ${{ inputs.post-command }}
43+
SLEEP_TIME: ${{ inputs.sleep_time }}
3844
run: |
45+
echo "Starting the execute_and_retry action for command $COMMAND"
46+
echo "Executing pre-command for the execute_and_retry action"
3947
eval "$PRE_COMMAND"
4048
4149
retry_counter=0
4250
while [ $retry_counter -lt $MAX_RETRY ]; do
51+
echo "Attempt Number $retry_counter for execute_and_retry action"
52+
4353
attempt_failed=0
4454
eval "$COMMAND" || attempt_failed=$?
4555
4656
if [ $attempt_failed -ne 0 ]; then
57+
echo "Command failed for execute_and_retry action, executing cleanup command for another attempt"
58+
4759
eval "$CLEANUP"
4860
retry_counter=$(($retry_counter+1))
49-
sleep 5
61+
sleep "$SLEEP_TIME"
5062
else
63+
echo "Command executed successfully for execute_and_retry"
5164
break
5265
fi
53-
54-
if [ $retry_counter -eq $max_retry ]; then
55-
echo "Max retry reached, command failed to execute properly. Exiting code"
66+
if [[ $retry_counter -eq $MAX_RETRY ]]; then
67+
echo "Max retry reached, command failed to execute properly. Exiting action"
5668
exit 1
5769
fi
5870
done
5971

60-
eval "$POST_COMMAND"
72+
echo "Executing post-command for the execute_and_retry action"
73+
eval "$POST_COMMAND"
74+
echo "Exiting execute_and_retry action"

.github/workflows/appsignals-e2e-eks-test.yml

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,12 @@ jobs:
108108
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
109109
--region ${{ inputs.aws-region }} \
110110
--approve"
111+
cleanup: "eksctl delete iamserviceaccount \
112+
--name service-account-${{ env.TESTING_ID }} \
113+
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
114+
--cluster ${{ inputs.test-cluster-name }} \
115+
--region ${{ inputs.aws-region }}"
116+
sleep_time: 60
111117

112118
- name: Initiate Terraform
113119
uses: ./.github/workflows/actions/execute_and_retry
@@ -148,19 +154,20 @@ jobs:
148154
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
149155
if [ $deployment_failed -eq 0 ]; then
150156
. ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
151-
execute_and_retry 2 \
157+
execute_and_retry 3 \
152158
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/enable-app-signals.sh \
153159
${{ inputs.test-cluster-name }} \
154160
${{ inputs.aws-region }} \
155161
${{ env.SAMPLE_APP_NAMESPACE }}" \
162+
60 \
156163
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/clean-app-signals.sh \
157164
${{ inputs.test-cluster-name }} \
158165
${{ inputs.aws-region }} \
159166
${{ env.SAMPLE_APP_NAMESPACE }} && \
160167
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
161168
162-
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}"
163-
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}"
169+
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" 60
170+
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" 10
164171
165172
echo "Attempting to connect to the main sample app endpoint"
166173
main_sample_app_endpoint=http://$(terraform output sample_app_endpoint)
@@ -224,7 +231,7 @@ jobs:
224231
break
225232
fi
226233
227-
if [ $retry_counter -eq $max_retry ]; then
234+
if [ $retry_counter -ge $max_retry ]; then
228235
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
229236
exit 1
230237
fi

.github/workflows/test 4.yml

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
## SPDX-License-Identifier: Apache-2.0
3+
4+
## This workflow aims to run the Application Signals end-to-end tests as a canary to
5+
## test the artifacts for App Signals enablement. It will deploy a sample app and remote
6+
## service on two EC2 instances, call the APIs, and validate the generated telemetry,
7+
## including logs, metrics, and traces.
8+
name: Test
9+
on:
10+
push:
11+
12+
permissions:
13+
id-token: write
14+
contents: read
15+
16+
jobs:
17+
e2e-ec2-test-1:
18+
strategy:
19+
fail-fast: false
20+
matrix:
21+
aws-region: ['us-east-1']
22+
uses: ./.github/workflows/appsignals-e2e-ec2-test.yml
23+
secrets: inherit
24+
with:
25+
aws-region: ${{ matrix.aws-region }}
26+
caller-workflow-name: 'test'
27+
28+
29+
e2e-eks-test-1:
30+
strategy:
31+
fail-fast: false
32+
matrix:
33+
aws-region: ['us-east-1']
34+
uses: ./.github/workflows/appsignals-e2e-eks-test.yml
35+
secrets: inherit
36+
with:
37+
aws-region: ${{ matrix.aws-region }}
38+
test-cluster-name: 'e2e-playground'
39+
caller-workflow-name: "test"
40+
# e2e-eks-test-2:
41+
# needs: [e2e-eks-test-1]
42+
# strategy:
43+
# fail-fast: false
44+
# matrix:
45+
# aws-region: ['us-east-1']
46+
# uses: ./.github/workflows/appsignals-e2e-eks-test.yml
47+
# secrets: inherit
48+
# with:
49+
# aws-region: ${{ matrix.aws-region }}
50+
# test-cluster-name: 'e2e-playground'
51+
# caller-workflow-name: "test"
52+
# e2e-eks-test-3:
53+
# needs: [e2e-eks-test-2]
54+
# strategy:
55+
# fail-fast: false
56+
# matrix:
57+
# aws-region: ['us-east-1']
58+
# uses: ./.github/workflows/appsignals-e2e-eks-test.yml
59+
# secrets: inherit
60+
# with:
61+
# aws-region: ${{ matrix.aws-region }}
62+
# test-cluster-name: 'e2e-playground'
63+
# caller-workflow-name: "test"

.github/workflows/util/execute_and_retry.sh

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,34 @@
33
# This function is for retrying commands in the case they fail. It accepts three arguments
44
# $1: Number of retries it will attempt
55
# $2: Command to execute
6-
# $3: (Optional) Command for cleaning up resources if $2 fails
6+
# $3: Sleep time between run
7+
# $4: (Optional) Command for cleaning up resources if $2 fails
78
execute_and_retry () {
89
# Warning: The variables called in this function are not local and will be shared with the calling function.
910
# Make sure that the variable names do not conflict
1011
execute_retry_counter=0
1112
max_execute_retry=$1
1213
command=$2
13-
cleanup=$3
14+
sleep_time=$3
15+
cleanup=$4
16+
echo "Initiating execute_and_retry.sh script for command $command"
1417
while [ $execute_retry_counter -lt $max_execute_retry ]; do
18+
echo "Attempt Number $execute_retry_counter for execute_and_retry.sh"
1519
attempt_failed=0
1620
eval "$command" || attempt_failed=$?
1721

1822
if [ $attempt_failed -ne 0 ]; then
23+
echo "Command failed for execute_and_retry.sh, executing cleanup command for another attempt"
1924
eval "$cleanup"
2025
execute_retry_counter=$(($execute_retry_counter+1))
21-
sleep 5
26+
sleep $sleep_time
2227
else
28+
echo "Command executed successfully for execute_and_retry.sh, exiting script"
2329
break
2430
fi
2531

26-
if [ $execute_retry_counter -eq $max_execute_retry ]; then
27-
echo "Max retry reached, command failed to execute properly. Exiting code"
32+
if [ "$execute_retry_counter" -eq "$max_execute_retry" ]; then
33+
echo "Max retry reached, command failed to execute properly. Exiting execute_and_retry.sh script"
2834
exit 1
2935
fi
3036
done

0 commit comments

Comments
 (0)