Skip to content

Commit 466f608

Browse files
committed
Check if remote application is deployed properly
Check if ec2 instance inline code fail
1 parent bdf1566 commit 466f608

File tree

7 files changed

+203
-87
lines changed

7 files changed

+203
-87
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# This function is for retrying commands in the case they fail. It accepts three arguments
2+
# $1: Number of retries it will attempt
3+
# $2: Command to execute
4+
# $3: (Optional) Command for cleaning up resources if $2 fails
5+
name: Command Retry Logic
6+
7+
inputs:
8+
pre-command:
9+
required: false
10+
type: string
11+
max_retry:
12+
required: false
13+
type: number
14+
default: 2
15+
command:
16+
required: true
17+
type: string
18+
cleanup:
19+
required: false
20+
type: string
21+
post-command:
22+
required: false
23+
type: string
24+
25+
runs:
26+
using: "composite"
27+
steps:
28+
- name: Run pre-command
29+
shell: bash
30+
env:
31+
PRE_COMMAND: ${{ inputs.pre-command }}
32+
run: |
33+
$PRE_COMMAND
34+
pwd
35+
36+
- name: Run command
37+
shell: bash
38+
env:
39+
MAX_RETRY: ${{ inputs.max_retry }}
40+
COMMAND: ${{ inputs.command }}
41+
CLEANUP: ${{ inputs.cleanup }}
42+
run: |
43+
pwd
44+
retry_counter=0
45+
while [ $retry_counter -lt $MAX_RETRY ]; do
46+
attempt_failed=0
47+
eval "$COMMAND" || attempt_failed=$?
48+
49+
if [ $attempt_failed -ne 0 ]; then
50+
eval "$CLEANUP"
51+
retry_counter=$(($retry_counter+1))
52+
sleep 5
53+
else
54+
break
55+
fi
56+
57+
if [ $retry_counter -eq $max_retry ]; then
58+
echo "Max retry reached, command failed to execute properly. Exiting code"
59+
exit 1
60+
fi
61+
done
62+
63+
- name: Run post command
64+
shell: bash
65+
env:
66+
POST_COMMAND: ${{ inputs.post-command }}
67+
run: $POST_COMMAND
68+
69+
70+
71+
72+
73+
74+
75+
76+

.github/workflows/appsignals-e2e-ec2-test.yml

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ env:
3131
LOG_GROUP_NAME: /aws/appsignals/generic
3232
TEST: ${{ inputs.test }}
3333
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
34+
GET_CW_AGENT_RPM_COMMAND: "wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/1.300031.0b313/amazon-cloudwatch-agent.rpm"
3435
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
3536

3637

@@ -42,9 +43,6 @@ jobs:
4243
with:
4344
fetch-depth: 0
4445

45-
- name: Set CW Agent RPM environment variable
46-
run: echo GET_CW_AGENT_RPM_COMMAND="wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/1.300031.0b313/amazon-cloudwatch-agent.rpm" >> $GITHUB_ENV
47-
4846
- name: Generate testing id
4947
run: echo TESTING_ID="${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
5048

@@ -67,17 +65,17 @@ jobs:
6765
aws-region: ${{ inputs.aws-region }}
6866

6967
- name: Set up terraform
70-
run: |
71-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
72-
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
73-
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
74-
sudo apt update && sudo apt install terraform
68+
uses: ./.github/workflows/actions/execute_and_retry
69+
with:
70+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
71+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list \
72+
sudo apt update && sudo apt install terraform'
7573

7674
- name: Initiate Terraform
77-
working-directory: terraform/ec2
78-
run: |
79-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
80-
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
75+
uses: ./.github/workflows/actions/execute_and_retry
76+
with:
77+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/ec2 && terraform init && terraform validate"
78+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
8179

8280
- name: Deploy sample app via terraform and wait for endpoint to come online
8381
working-directory: terraform/ec2
@@ -108,10 +106,26 @@ jobs:
108106
# Attempts to connect will be made for up to 10 minutes
109107
if [ $deployment_failed -eq 0 ]; then
110108
echo "Attempting to connect to the endpoint"
111-
sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
109+
main_sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
112110
attempt_counter=0
113111
max_attempts=60
114-
until $(curl --output /dev/null --silent --head --fail $(echo "$sample_app_endpoint" | tr -d '"')); do
112+
until $(curl --output /dev/null --silent --head --fail $(echo "$main_sample_app_endpoint" | tr -d '"')); do
113+
if [ ${attempt_counter} -eq ${max_attempts} ];then
114+
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
115+
deployment_failed=1
116+
break
117+
fi
118+
119+
printf '.'
120+
attempt_counter=$(($attempt_counter+1))
121+
sleep 10
122+
done
123+
124+
echo "Attempting to connect to the remote sample app endpoint"
125+
remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8080/healthcheck
126+
attempt_counter=0
127+
max_attempts=60
128+
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
115129
if [ ${attempt_counter} -eq ${max_attempts} ];then
116130
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
117131
deployment_failed=1
@@ -164,9 +178,9 @@ jobs:
164178
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call/
165179
166180
- name: Build Gradlew
167-
run: |
168-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
169-
execute_and_retry 2 ./gradlew
181+
uses: ./.github/workflows/actions/execute_and_retry
182+
with:
183+
command: "./gradlew"
170184

171185
# Validation for pulse telemetry data
172186
- name: Validate generated EMF logs

.github/workflows/appsignals-e2e-eks-test.yml

Lines changed: 48 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -94,38 +94,42 @@ jobs:
9494
- name: Set up kubeconfig
9595
run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
9696

97+
- name: Download eksctl
98+
uses: ./.github/workflows/actions/execute_and_retry
99+
with:
100+
pre-command: 'mkdir ${{ github.workspace }}/eksctl'
101+
command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"'
102+
97103
- name: Install eksctl
98104
run: |
99-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
100-
mkdir ${{ github.workspace }}/eksctl
101-
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
102-
execute_and_retry 2 "tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz"
105+
tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz
103106
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
104107
105108
- name: Create role for AWS access from the sample app
106109
id: create_service_account
107-
run: |
108-
eksctl create iamserviceaccount \
110+
uses: ./.github/workflows/actions/execute_and_retry
111+
with:
112+
command: "eksctl create iamserviceaccount \
109113
--name service-account-${{ env.TESTING_ID }} \
110114
--namespace ${{ env.SAMPLE_APP_NAMESPACE }} \
111115
--cluster ${{ inputs.test-cluster-name }} \
112116
--role-name eks-s3-access-${{ env.TESTING_ID }} \
113117
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
114118
--region ${{ inputs.aws-region }} \
115-
--approve
119+
--approve"
116120

117121
- name: Set up terraform
118-
run: |
119-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
120-
execute_and_retry 2 "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
121-
echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
122-
sudo apt update && sudo apt install terraform
122+
uses: ./.github/workflows/actions/execute_and_retry
123+
with:
124+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
125+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list \
126+
sudo apt update && sudo apt install terraform'
123127

124128
- name: Initiate Terraform
125-
working-directory: terraform/eks
126-
run: |
127-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
128-
execute_and_retry 2 "terraform init && terraform validate" "rm -rf .terraform && rm -rf .terraform.lock.hcl"
129+
uses: ./.github/workflows/actions/execute_and_retry
130+
with:
131+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/eks && terraform init && terraform validate"
132+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
129133

130134
- name: Deploy sample app via terraform and wait for the endpoint to come online
131135
id: deploy-sample-app
@@ -159,6 +163,21 @@ jobs:
159163
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint
160164
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
161165
if [ $deployment_failed -eq 0 ]; then
166+
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
167+
execute_and_retry 2 \
168+
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
169+
${{ inputs.test-cluster-name }} \
170+
${{ inputs.aws-region }} \
171+
${{ env.SAMPLE_APP_NAMESPACE }}" \
172+
"${{ env.TEST_RESOURCES_FOLDER }}/enablement-script/scripts/eks/appsignals/clean-app-signals.sh \
173+
${{ inputs.test-cluster-name }} \
174+
${{ inputs.aws-region }} \
175+
${{ env.SAMPLE_APP_NAMESPACE }} && \
176+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
177+
178+
kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
179+
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
180+
162181
echo "Attempting to connect to the main sample app endpoint"
163182
main_sample_app_endpoint=http://$(terraform output sample_app_endpoint)
164183
attempt_counter=0
@@ -176,9 +195,7 @@ jobs:
176195
done
177196
178197
echo "Attempting to connect to the remote sample app endpoint"
179-
remote_service_pod_ip=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')
180-
remote_sample_app_endpoint=http://$remote_service_pod_ip/healthcheck
181-
echo $remote_service_pod_ip
198+
remote_sample_app_endpoint=http://$(terraform output sample_remote_app_endpoint)/healthcheck
182199
echo $remote_sample_app_endpoint
183200
attempt_counter=0
184201
max_attempts=60
@@ -198,6 +215,15 @@ jobs:
198215
# If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
199216
# resources created from terraform and try again.
200217
if [ $deployment_failed -eq 1 ]; then
218+
echo "Cleaning up App Signal"
219+
./clean-app-signals.sh \
220+
${{ inputs.test-cluster-name }} \
221+
${{ inputs.aws-region }} \
222+
${{ env.SAMPLE_APP_NAMESPACE }}
223+
224+
# Running clean-app-signal.sh removes the current cluster from the config. Update the cluster again for subsequent runs.
225+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
226+
201227
echo "Destroying terraform"
202228
terraform destroy -auto-approve \
203229
-var="test_id=${{ env.TESTING_ID }}" \
@@ -220,24 +246,6 @@ jobs:
220246
fi
221247
done
222248
223-
- name: Install AppSignals addon
224-
run: |
225-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
226-
execute_and_retry 2 \
227-
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
228-
${{ inputs.test-cluster-name }} \
229-
${{ inputs.aws-region }} \
230-
${{ env.SAMPLE_APP_NAMESPACE }}" \
231-
" echo "Cleaning up App Signal" \
232-
./clean-app-signals.sh \
233-
${{ inputs.test-cluster-name }} \
234-
${{ inputs.aws-region }} \
235-
${{ env.SAMPLE_APP_NAMESPACE }} && \
236-
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
237-
238-
kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
239-
kubectl wait --for=condition=Ready pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}
240-
241249
- name: Get remote service pod name and IP
242250
run: |
243251
echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV
@@ -267,9 +275,9 @@ jobs:
267275
curl -S -s http://${{ env.APP_ENDPOINT }}/client-call/
268276
269277
- name: Build Gradlew
270-
run: |
271-
source ${{ env.TEST_RESOURCES_FOLDER }}/.github/workflows/util/execute_and_retry.sh
272-
execute_and_retry 2 "./gradlew"
278+
uses: ./.github/workflows/actions/execute_and_retry
279+
with:
280+
command: "./gradlew"
273281

274282
# Validation for app signals telemetry data
275283
- name: Call endpoint and validate generated EMF logs

.github/workflows/test.yml

Lines changed: 0 additions & 27 deletions
This file was deleted.

.github/workflows/util/execute_and_retry.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#!/bin/bash
2-
31
# This function is for retrying commands in the case they fail. It accepts three arguments
42
# $1: Number of retries it will attempt
53
# $2: Command to execute
@@ -28,4 +26,8 @@ execute_and_retry () {
2826
done
2927
}
3028

31-
export -f execute_and_retry
29+
export -f execute_and_retry
30+
31+
32+
33+

terraform/ec2/main.tf

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,8 @@ resource "null_resource" "main_service_setup" {
104104

105105
provisioner "remote-exec" {
106106
inline = [
107+
# Make the Terraform fail if any step throws an error
108+
"set -o errexit",
107109
# Install Java 11 and wget
108110
"sudo yum install wget java-11-amazon-corretto -y",
109111

@@ -165,6 +167,8 @@ resource "null_resource" "remote_service_setup" {
165167

166168
provisioner "remote-exec" {
167169
inline = [
170+
# Make the Terraform fail if any step throws an error
171+
"set -o errexit",
168172
# Install Java 11 and wget
169173
"sudo yum install wget java-11-amazon-corretto -y",
170174

0 commit comments

Comments
 (0)