Skip to content

Commit 9005f19

Browse files
committed
Check if remote application is deployed properly
Check if ec2 instance inline code fail
1 parent bdf1566 commit 9005f19

File tree

6 files changed

+196
-87
lines changed

6 files changed

+196
-87
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Reusuable Action for executing commands and retrying them if it fails
2+
name: Command Retry Logic
3+
4+
inputs:
5+
pre-command:
6+
required: false
7+
type: string
8+
max_retry:
9+
required: false
10+
type: number
11+
default: 2
12+
command:
13+
required: true
14+
type: string
15+
cleanup:
16+
required: false
17+
type: string
18+
post-command:
19+
required: false
20+
type: string
21+
22+
runs:
23+
using: "composite"
24+
steps:
25+
- name: Run pre-command
26+
shell: bash
27+
env:
28+
PRE_COMMAND: ${{ inputs.pre-command }}
29+
run: |
30+
$PRE_COMMAND
31+
32+
- name: Run command
33+
shell: bash
34+
env:
35+
MAX_RETRY: ${{ inputs.max_retry }}
36+
COMMAND: ${{ inputs.command }}
37+
CLEANUP: ${{ inputs.cleanup }}
38+
run: |
39+
retry_counter=0
40+
while [ $retry_counter -lt $MAX_RETRY ]; do
41+
attempt_failed=0
42+
eval "$COMMAND" || attempt_failed=$?
43+
44+
if [ $attempt_failed -ne 0 ]; then
45+
eval "$CLEANUP"
46+
retry_counter=$(($retry_counter+1))
47+
sleep 5
48+
else
49+
break
50+
fi
51+
52+
if [ $retry_counter -eq $max_retry ]; then
53+
echo "Max retry reached, command failed to execute properly. Exiting code"
54+
exit 1
55+
fi
56+
done
57+
58+
- name: Run post command
59+
shell: bash
60+
env:
61+
POST_COMMAND: ${{ inputs.post-command }}
62+
run: $POST_COMMAND
63+
64+
65+
66+
67+
68+
69+
70+
71+

.github/workflows/appsignals-e2e-ec2-test.yml

Lines changed: 32 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ env:
3131
LOG_GROUP_NAME: /aws/appsignals/generic
3232
TEST: ${{ inputs.test }}
3333
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
34+
GET_CW_AGENT_RPM_COMMAND: "wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/1.300031.0b313/amazon-cloudwatch-agent.rpm"
3435
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
3536

3637

@@ -42,9 +43,6 @@ jobs:
4243
with:
4344
fetch-depth: 0
4445

45-
- name: Set CW Agent RPM environment variable
46-
run: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/1.300031.0b313/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV
47-
4846
- name: Generate testing id
4947
run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
5048

@@ -67,17 +65,17 @@ jobs:
6765
aws-region: ${{ inputs.aws-region }}
6866

6967
- name: Set up terraform
70-
run: |
71-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
72-
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
73-
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
74-
sudo apt update && sudo apt install terraform
68+
uses: ./.github/workflows/actions/execute_and_retry
69+
with:
70+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
71+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list \
72+
sudo apt update && sudo apt install terraform'
7573

7674
- name: Initiate Terraform
77-
working-directory: terraform/ec2
78-
run: |
79-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
80-
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
75+
uses: ./.github/workflows/actions/execute_and_retry
76+
with:
77+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/ec2 && terraform init && terraform validate"
78+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
8179

8280
- name: Deploy sample app via terraform and wait for endpoint to come online
8381
working-directory: terraform/ec2
@@ -108,10 +106,26 @@ jobs:
108106
# Attempts to connect will be made for up to 10 minutes
109107
if [ $deployment_failed -eq 0 ]; then
110108
echo "Attempting to connect to the endpoint"
111-
sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
109+
main_sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
110+
attempt_counter=0
111+
max_attempts=30
112+
until $(curl --output /dev/null --silent --head --fail $(echo "$main_sample_app_endpoint" | tr -d '"')); do
113+
if [ ${attempt_counter} -eq ${max_attempts} ];then
114+
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
115+
deployment_failed=1
116+
break
117+
fi
118+
119+
printf '.'
120+
attempt_counter=$(($attempt_counter+1))
121+
sleep 10
122+
done
123+
124+
echo "Attempting to connect to the remote sample app endpoint"
125+
remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8080/healthcheck
112126
attempt_counter=0
113-
max_attempts=60
114-
until $(curl --output /dev/null --silent --head --fail $(echo "$sample_app_endpoint" | tr -d '"')); do
127+
max_attempts=30
128+
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
115129
if [ ${attempt_counter} -eq ${max_attempts} ];then
116130
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
117131
deployment_failed=1
@@ -164,9 +178,9 @@ jobs:
164178
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/
165179
166180
- name: Build Gradlew
167-
run: |
168-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
169-
execute_and_retry 2 ./gradlew
181+
uses: ./.github/workflows/actions/execute_and_retry
182+
with:
183+
command: "./gradlew"
170184

171185
# Validation for pulse telemetry data
172186
- name: Validate generated EMF logs

.github/workflows/appsignals-e2e-eks-test.yml

Lines changed: 50 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -94,38 +94,42 @@ jobs:
9494
- name: Set up kubeconfig
9595
run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
9696

97+
- name: Download eksctl
98+
uses: ./.github/workflows/actions/execute_and_retry
99+
with:
100+
pre-command: 'mkdir ${{ github.workspace }}/eksctl'
101+
command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"'
102+
97103
- name: Install eksctl
98104
run: |
99-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
100-
mkdir ${{ github.workspace }}/eksctl
101-
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
102-
execute_and_retry 2 "tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz"
105+
tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz
103106
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
104107
105108
- name: Create role for AWS access from the sample app
106109
id: create_service_account
107-
run: |
108-
eksctl create iamserviceaccount \
110+
uses: ./.github/workflows/actions/execute_and_retry
111+
with:
112+
command: "eksctl create iamserviceaccount \
109113
--name service-account-${{ env.TESTING_ID }} \
110114
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
111115
--cluster ${{ inputs.test-cluster-name }} \
112116
--role-name eks-s3-access-${{ env.TESTING_ID }} \
113117
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
114118
--region ${{ inputs.aws-region }} \
115-
--approve
119+
--approve"
116120

117121
- name: Set up terraform
118-
run: |
119-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
120-
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
121-
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
122-
sudo apt update && sudo apt install terraform
122+
uses: ./.github/workflows/actions/execute_and_retry
123+
with:
124+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
125+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list \
126+
sudo apt update && sudo apt install terraform'
123127

124128
- name: Initiate Terraform
125-
working-directory: terraform/eks
126-
run: |
127-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
128-
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
129+
uses: ./.github/workflows/actions/execute_and_retry
130+
with:
131+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/eks && terraform init && terraform validate"
132+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
129133

130134
- name: Deploy sample app via terraform and wait for the endpoint to come online
131135
id: deploy-sample-app
@@ -159,10 +163,25 @@ jobs:
159163
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint
160164
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
161165
if [ $deployment_failed -eq 0 ]; then
166+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
167+
execute_and_retry 2 \
168+
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
169+
${{ inputs.test-cluster-name }} \
170+
${{ inputs.aws-region }} \
171+
${{ env.SAMPLE_APP_NAMESPACE }}" \
172+
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/scripts/eks/appsignals/clean-app-signals.sh \
173+
${{ inputs.test-cluster-name }} \
174+
${{ inputs.aws-region }} \
175+
${{ env.SAMPLE_APP_NAMESPACE }} && \
176+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
177+
178+
kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
179+
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
180+
162181
echo "Attempting to connect to the main sample app endpoint"
163182
main_sample_app_endpoint=http://$(terraform output sample_app_endpoint)
164183
attempt_counter=0
165-
max_attempts=60
184+
max_attempts=30
166185
until $(curl --output /dev/null --silent --head --fail $(echo "$main_sample_app_endpoint" | tr -d '"')); do
167186
if [ ${attempt_counter} -eq ${max_attempts} ];then
168187
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
@@ -176,12 +195,10 @@ jobs:
176195
done
177196
178197
echo "Attempting to connect to the remote sample app endpoint"
179-
remote_service_pod_ip=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')
180-
remote_sample_app_endpoint=http://$remote_service_pod_ip/healthcheck
181-
echo $remote_service_pod_ip
198+
remote_sample_app_endpoint=http://$(terraform output sample_remote_app_endpoint)/healthcheck
182199
echo $remote_sample_app_endpoint
183200
attempt_counter=0
184-
max_attempts=60
201+
max_attempts=30
185202
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
186203
if [ ${attempt_counter} -eq ${max_attempts} ];then
187204
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
@@ -198,6 +215,15 @@ jobs:
198215
# If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
199216
# resources created from terraform and try again.
200217
if [ $deployment_failed -eq 1 ]; then
218+
echo "Cleaning up App Signal"
219+
./clean-app-signals.sh \
220+
${{ inputs.test-cluster-name }} \
221+
${{ inputs.aws-region }} \
222+
${{ env.SAMPLE_APP_NAMESPACE }}
223+
224+
# Running clean-app-signal.sh removes the current cluster from the config. Update the cluster again for subsequent runs.
225+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
226+
201227
echo "Destroying terraform"
202228
terraform destroy -auto-approve \
203229
-var="test_id=${{ env.TESTING_ID }}" \
@@ -220,24 +246,6 @@ jobs:
220246
fi
221247
done
222248
223-
- name: Install AppSignals addon
224-
run: |
225-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
226-
execute_and_retry 2 \
227-
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
228-
${{ inputs.test-cluster-name }} \
229-
${{ inputs.aws-region }} \
230-
${{ env.SAMPLE_APP_NAMESPACE }}" \
231-
" echo "Cleaning up App Signal" \
232-
./clean-app-signals.sh \
233-
${{ inputs.test-cluster-name }} \
234-
${{ inputs.aws-region }} \
235-
${{ env.SAMPLE_APP_NAMESPACE }} && \
236-
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
237-
238-
kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
239-
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
240-
241249
- name: Get remote service pod name and IP
242250
run: |
243251
echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV
@@ -267,9 +275,9 @@ jobs:
267275
curl -S -s http://${{ env.APP_ENDPOINT }}/client-call/
268276
269277
- name: Build Gradlew
270-
run: |
271-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
272-
execute_and_retry 2 "./gradlew"
278+
uses: ./.github/workflows/actions/execute_and_retry
279+
with:
280+
command: "./gradlew"
273281

274282
# Validation for app signals telemetry data
275283
- name: Call endpoint and validate generated EMF logs

.github/workflows/test.yml

Lines changed: 0 additions & 27 deletions
This file was deleted.

terraform/ec2/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ resource "null_resource" "main_service_setup" {
104104

105105
provisioner "remote-exec" {
106106
inline = [
107+
# Make the Terraform fail if any step throws an error
108+
"set -o errexit",
107109
# Install Java 11 and wget
108110
"sudo yum install wget java-11-amazon-corretto -y",
109111

@@ -165,6 +167,8 @@ resource "null_resource" "remote_service_setup" {
165167

166168
provisioner "remote-exec" {
167169
inline = [
170+
# Make the Terraform fail if any step throws an error
171+
"set -o errexit",
168172
# Install Java 11 and wget
169173
"sudo yum install wget java-11-amazon-corretto -y",
170174

0 commit comments

Comments
 (0)