Skip to content

Commit b7df778

Browse files
committed
Add Retry Mechanism for Gradlew, Terraform, and Eksctl
1 parent 99c326f commit b7df778

File tree

3 files changed

+71
-31
lines changed

3 files changed

+71
-31
lines changed

.github/workflows/appsignals-e2e-ec2-test.yml

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ env:
3131
LOG_GROUP_NAME: /aws/appsignals/generic
3232
TEST: ${{ inputs.test }}
3333
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
34+
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
35+
3436

3537
jobs:
3638
e2e-ec2-test:
@@ -65,16 +67,21 @@ jobs:
6567
aws-region: ${{ inputs.aws-region }}
6668

6769
- name: Set up terraform
68-
uses: hashicorp/setup-terraform@v3
69-
with:
70-
terraform_wrapper: false
70+
run: |
71+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
72+
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
73+
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
74+
sudo apt update && sudo apt install terraform
75+
76+
- name: Initiate Terraform
77+
working-directory: terraform/ec2
78+
run: |
79+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
80+
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
7181
7282
- name: Deploy sample app via terraform and wait for endpoint to come online
7383
working-directory: terraform/ec2
7484
run: |
75-
terraform init
76-
terraform validate
77-
7885
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online.
7986
# There may be occasional failures due to transitivity issues, so try up to 2 times.
8087
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
@@ -151,10 +158,15 @@ jobs:
151158
- name: Call all test APIs
152159
continue-on-error: true
153160
run: |
154-
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/
155-
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/
156-
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/
157-
curl -S -s -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/
161+
curl -S -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call/
162+
curl -S -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call/
163+
curl -S -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/
164+
curl -S -o /dev/null http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/
165+
166+
- name: Build Gradlew
167+
run: |
168+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
169+
execute_and_retry 2 ./gradlew
158170
159171
# Validation for pulse telemetry data
160172
- name: Validate generated EMF logs

.github/workflows/appsignals-e2e-eks-test.yml

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ env:
3535
SAMPLE_APP_REMOTE_SERVICE_IMAGE: ${{ secrets.APP_SIGNALS_E2E_TEST_ACC }}.dkr.ecr.${{ inputs.aws-region }}.amazonawss.com/${{ secrets.APP_SIGNALS_E2E_RE_SA_IMG }}
3636
METRIC_NAMESPACE: AppSignals
3737
LOG_GROUP_NAME: /aws/appsignals/eks
38+
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
3839

3940
jobs:
4041
e2e-eks-test:
@@ -95,9 +96,10 @@ jobs:
9596

9697
- name: Install eksctl
9798
run: |
99+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
98100
mkdir ${{ github.workspace }}/eksctl
99101
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
100-
tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz
102+
execute_and_retry 2 "tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz"
101103
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
102104
103105
- name: Create role for AWS access from the sample app
@@ -113,17 +115,22 @@ jobs:
113115
--approve
114116
115117
- name: Set up terraform
116-
uses: hashicorp/setup-terraform@v3
117-
with:
118-
terraform_wrapper: false
118+
run: |
119+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
120+
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
121+
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
122+
sudo apt update && sudo apt install terraform
123+
124+
- name: Initiate Terraform
125+
working-directory: terraform/eks
126+
run: |
127+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
128+
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
119129
120130
- name: Deploy sample app via terraform and wait for the endpoint to come online
121131
id: deploy-sample-app
122132
working-directory: terraform/eks
123-
run: |
124-
terraform init
125-
terraform validate
126-
133+
run: |
127134
# Attempt to deploy the sample app on an EKS instance and wait for its endpoint to come online.
128135
# There may be occasional failures due to transitivity issues, so try up to 2 times.
129136
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
@@ -158,15 +165,6 @@ jobs:
158165
${{ inputs.aws-region }} \
159166
${{ env.SAMPLE_APP_NAMESPACE }}
160167
161-
# If the workflow provides a specific ADOT image to test, patch the deployment and restart CW agent related pods
162-
if [ ${{ inputs.appsignals-adot-image-name }} != "" ]; then
163-
kubectl patch deploy -namazon-cloudwatch amazon-cloudwatch-observability-controller-manager --type='json' \
164-
-p='[{"op": "replace", "path": "/spec/template/spec/containers/0/args/0", "value": "--auto-instrumentation-java-image=${{ inputs.appsignals-adot-image-name }}"}]'
165-
166-
kubectl delete pods --all -n amazon-cloudwatch
167-
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
168-
fi
169-
170168
kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
171169
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
172170
@@ -244,10 +242,15 @@ jobs:
244242
- name: Call all test APIs
245243
continue-on-error: true
246244
run: |
247-
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call/
248-
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call/
249-
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}/
250-
curl -S -s -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call/
245+
curl -S -o /dev/null http://${{ env.APP_ENDPOINT }}/outgoing-http-call/
246+
curl -S -o /dev/null http://${{ env.APP_ENDPOINT }}/aws-sdk-call/
247+
curl -S -o /dev/null http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}/
248+
curl -S -o /dev/null http://${{ env.APP_ENDPOINT }}/client-call/
249+
250+
- name: Build Gradlew
251+
run: |
252+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
253+
execute_and_retry 2 "./gradlew"
251254
252255
# Validation for app signals telemetry data
253256
- name: Call endpoint and validate generated EMF logs
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/bin/bash
2+
3+
execute_and_retry () {
4+
retry_counter=0
5+
max_retry=$1
6+
while [ $retry_counter -lt $max_retry ]; do
7+
deployment_failed=0
8+
eval "$2" || deployment_failed=$?
9+
10+
if [ $deployment_failed -eq 1 ]; then
11+
eval "$3"
12+
retry_counter=$(($retry_counter+1))
13+
sleep 5
14+
else
15+
break
16+
fi
17+
18+
if [ $retry_counter -eq $max_retry ]; then
19+
echo "Max retry reached, command failed to execute properly. Exiting code"
20+
exit 1
21+
fi
22+
done
23+
}
24+
25+
export -f execute_and_retry

0 commit comments

Comments
 (0)