Skip to content

Commit 28976d1

Browse files
authored
Apply retry using enablement script (#146)
*Issue #, if available:* Apply retry using enablement script to align with Java E2E tests. sample workflow tests: EKS E2E:https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8622097463 EC2 E2E:https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8622052335 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent 1c8b497 commit 28976d1

File tree

2 files changed

+51
-55
lines changed

2 files changed

+51
-55
lines changed

.github/workflows/appsignals-python-e2e-ec2-test.yml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ env:
3232
METRIC_NAMESPACE: AppSignals
3333
LOG_GROUP_NAME: /aws/appsignals/generic
3434
ADOT_WHEEL_NAME: ${{ inputs.staging_wheel_name }}
35-
35+
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
3636

3737
jobs:
3838
python-e2e-ec2-test:
@@ -88,15 +88,21 @@ jobs:
8888
fi
8989
9090
- name: Set up terraform
91-
uses: hashicorp/setup-terraform@v3
91+
uses: ./.github/workflows/actions/execute_and_retry
92+
with:
93+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
94+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
95+
&& sudo apt update && sudo apt install terraform'
96+
97+
- name: Initiate Terraform
98+
uses: ./.github/workflows/actions/execute_and_retry
9299
with:
93-
terraform_wrapper: false
100+
command: "cd terraform/python/ec2 && terraform init && terraform validate"
101+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
94102

95103
- name: Deploy sample app via terraform and wait for endpoint to come online
96104
working-directory: terraform/python/ec2
97105
run: |
98-
terraform init
99-
terraform validate
100106
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online.
101107
# There may be occasional failures due to transitivity issues, so try up to 2 times.
102108
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates

.github/workflows/appsignals-python-e2e-eks-test.yml

Lines changed: 40 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ env:
3535
PYTHON_SAMPLE_APP_NAMESPACE: python-app-namespace
3636
METRIC_NAMESPACE: AppSignals
3737
LOG_GROUP_NAME: /aws/appsignals/eks
38+
TEST_RESOURCES_FOLDER: /home/runner/work/aws-application-signals-test-framework/aws-application-signals-test-framework
3839

3940
jobs:
4041
python-e2e-eks-test:
@@ -47,24 +48,17 @@ jobs:
4748
fetch-depth: 0
4849

4950
- name: Download enablement script
50-
uses: actions/checkout@v4
51+
uses: ./.github/workflows/actions/execute_and_retry
5152
with:
52-
repository: aws-observability/application-signals-demo
53-
ref: main
54-
path: enablement-script
55-
sparse-checkout: |
56-
scripts/eks/appsignals/enable-app-signals.sh
57-
scripts/eks/appsignals/clean-app-signals.sh
58-
sparse-checkout-cone-mode: false
59-
60-
- name: Resolve Add-on configuration conflict
61-
working-directory: enablement-script/scripts/eks/appsignals
62-
run: |
63-
sed -i 's/aws eks create-addon \\/aws eks create-addon \\\n --resolve-conflicts OVERWRITE \\/' enable-app-signals.sh
53+
pre-command: "mkdir enablement-script && cd enablement-script"
54+
command: "wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/enable-app-signals.sh
55+
&& wget https://raw.githubusercontent.com/aws-observability/application-signals-demo/main/scripts/eks/appsignals/clean-app-signals.sh"
56+
cleanup: "rm -f enable-app-signals.sh && rm -f clean-app-signals.sh"
57+
post-command: "chmod +x enable-app-signals.sh && chmod +x clean-app-signals.sh"
6458

6559
- name: Remove log group deletion command
6660
if: always()
67-
working-directory: enablement-script/scripts/eks/appsignals
61+
working-directory: enablement-script
6862
run: |
6963
delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION"
7064
sed -i "s#$delete_log_group##g" clean-app-signals.sh
@@ -103,38 +97,47 @@ jobs:
10397
run: aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
10498

10599
- name: Install eksctl
106-
working-directory: .github/
100+
uses: ./.github/workflows/actions/execute_and_retry
101+
with:
102+
pre-command: 'mkdir ${{ github.workspace }}/eksctl'
103+
command: 'curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
104+
&& tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz'
105+
cleanup: 'rm -f eksctl_Linux_amd64.tar.gz'
106+
107+
- name: Add eksctl to Github Path
107108
run: |
108-
source ./workflows/util/execute_and_retry.sh
109-
mkdir ${{ github.workspace }}/eksctl
110-
curl -sLO "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_Linux_amd64.tar.gz"
111-
execute_and_retry 2 "tar -xzf eksctl_Linux_amd64.tar.gz -C ${{ github.workspace }}/eksctl && rm eksctl_Linux_amd64.tar.gz"
112109
echo "${{ github.workspace }}/eksctl" >> $GITHUB_PATH
113110
114111
- name: Create role for AWS access from the sample app
115112
id: create_service_account
116-
run: |
117-
eksctl create iamserviceaccount \
113+
uses: ./.github/workflows/actions/execute_and_retry
114+
with:
115+
command: "eksctl create iamserviceaccount \
118116
--name service-account-${{ env.TESTING_ID }} \
119117
--namespace ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} \
120118
--cluster ${{ inputs.test-cluster-name }} \
121119
--role-name eks-s3-access-${{ env.TESTING_ID }} \
122120
--attach-policy-arn arn:aws:iam::aws:policy/AmazonS3ReadOnlyAccess \
123121
--region ${{ inputs.aws-region }} \
124-
--approve
122+
--approve"
125123

126124
- name: Set up terraform
127-
uses: hashicorp/setup-terraform@v3
125+
uses: ./.github/workflows/actions/execute_and_retry
126+
with:
127+
command: "wget -O- https://apt.releases.hashicorp.com/gpg | sudo gpg --dearmor -o /usr/share/keyrings/hashicorp-archive-keyring.gpg"
128+
post-command: 'echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] https://apt.releases.hashicorp.com $(lsb_release -cs) main" | sudo tee /etc/apt/sources.list.d/hashicorp.list
129+
&& sudo apt update && sudo apt install terraform'
130+
131+
- name: Initiate Terraform
132+
uses: ./.github/workflows/actions/execute_and_retry
128133
with:
129-
terraform_wrapper: false
134+
command: "cd terraform/python/eks && terraform init && terraform validate"
135+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
130136

131137
- name: Deploy sample app via terraform and wait for the endpoint to come online
132138
id: deploy-python-app
133139
working-directory: terraform/python/eks
134140
run: |
135-
terraform init
136-
terraform validate
137-
138141
# Attempt to deploy the sample app on an EKS instance and wait for its endpoint to come online.
139142
# There may be occasional failures due to transitivity issues, so try up to 2 times.
140143
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
@@ -163,26 +166,23 @@ jobs:
163166
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint
164167
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
165168
if [ $deployment_failed -eq 0 ]; then
166-
kubectl wait --for=condition=Ready pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
167-
168169
echo "Installing app signals to the sample app"
169170
source ${GITHUB_WORKSPACE}/.github/workflows/util/execute_and_retry.sh
170171
execute_and_retry 2 \
171-
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
172+
"${GITHUB_WORKSPACE}/enablement-script/enable-app-signals.sh \
172173
${{ inputs.test-cluster-name }} \
173174
${{ inputs.aws-region }} \
174175
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \
175-
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/clean-app-signals.sh \
176+
"${GITHUB_WORKSPACE}/enablement-script/clean-app-signals.sh \
176177
${{ inputs.test-cluster-name }} \
177178
${{ inputs.aws-region }} \
178179
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} && \
179180
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
180181
181-
182182
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}"
183183
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}"
184184
185-
echo "Attempting to connect to the endpoint"
185+
echo "Attempting to connect to the main sample app endpoint"
186186
python_app_endpoint=http://$(terraform output python_app_endpoint)
187187
attempt_counter=0
188188
max_attempts=60
@@ -203,9 +203,13 @@ jobs:
203203
# resources created from terraform and try again.
204204
if [ $deployment_failed -eq 1 ]; then
205205
echo "Cleaning up App Signal"
206+
${GITHUB_WORKSPACE}/enablement-script/clean-app-signals.sh \
207+
${{ inputs.test-cluster-name }} \
208+
${{ inputs.aws-region }} \
209+
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
206210
207-
cd ${{ github.workspace }}/amazon-cloudwatch-agent-operator/
208-
kubectl delete -f ./namespace.yaml
211+
# Running clean-app-signal.sh removes the current cluster from the config. Update the cluster again for subsequent runs.
212+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}
209213
210214
echo "Destroying terraform"
211215
terraform destroy -auto-approve \
@@ -231,20 +235,6 @@ jobs:
231235
fi
232236
done
233237
234-
# Attach policies to cluster node group roles that are required for AppSignals
235-
aws eks list-nodegroups --cluster-name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }} |\
236-
jq -r '.nodegroups[]' |\
237-
while read -r node_group;
238-
do
239-
node_role=$(\
240-
aws eks describe-nodegroup --cluster-name ${{ inputs.test-cluster-name }} --nodegroup-name $node_group --region ${{ inputs.aws-region }} |\
241-
jq -r '.nodegroup.nodeRole' |\
242-
cut -d'/' -f2
243-
)
244-
aws iam attach-role-policy --role-name $node_role --policy-arn arn:aws:iam::aws:policy/CloudWatchAgentServerPolicy --region ${{ inputs.aws-region }}
245-
aws iam attach-role-policy --role-name $node_role --policy-arn arn:aws:iam::aws:policy/AWSXRayWriteOnlyAccess --region ${{ inputs.aws-region }}
246-
done
247-
248238
- name: Get remote service pod name and IP
249239
run: |
250240
echo "REMOTE_SERVICE_DEPLOYMENT_NAME=$(kubectl get deployments -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].metadata.name}')" >> $GITHUB_ENV
@@ -348,7 +338,7 @@ jobs:
348338
- name: Clean Up App Signals
349339
if: always()
350340
continue-on-error: true
351-
working-directory: enablement-script/scripts/eks/appsignals
341+
working-directory: enablement-script
352342
run: |
353343
./clean-app-signals.sh \
354344
${{ inputs.test-cluster-name }} \

0 commit comments

Comments
 (0)