Skip to content

Commit 722263e

Browse files
authored
Switch to use CW add-on for python EKS E2E tests. (#124)
Switch to use CW add-on for python EKS E2E tests. The `execute_and_retry/actions.yml` is added to match with the [Java E2E test](https://github.com/aws-observability/aws-application-signals-test-framework/tree/main/.github/workflows/actions/execute_and_retry). This file will be removed after GA and python tests move to aws-application-signals-test-framework repo. Related change in test framework repo: aws-observability/aws-application-signals-test-framework#32 Since the XRay service bug fix haven't been deployed to all regions, the "App Signals Enablement - Python E2E EKS Canary Testing" workflow will be temporarily disabled. Will enable after deployment reach to all regions. An example of test workflow run: https://github.com/aws-observability/aws-otel-python-instrumentation/actions/runs/8526689116/job/23356474268 By submitting this pull request, I confirm that you can use, modify, copy, and redistribute this contribution, under the terms of your choice.
1 parent c153c27 commit 722263e

File tree

2 files changed

+103
-66
lines changed

2 files changed

+103
-66
lines changed
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Reusable Action for executing commands and retrying them if it fails
2+
name: Command Retry Logic
3+
4+
inputs:
5+
# (Optional) Command to run before the retry command. To be used for environment setup, etc
6+
pre-command:
7+
required: false
8+
type: string
9+
# (Optional) Number of retries to perform. Default is 2
10+
max_retry:
11+
required: false
12+
type: number
13+
default: 2
14+
# (Required) Command to execute with the retry mechanism
15+
command:
16+
required: true
17+
type: string
18+
# (Required) Command to clean up resources before retrying the main command
19+
cleanup:
20+
required: false
21+
type: string
22+
# (Optional) Follow-up command after the main command is finished.
23+
post-command:
24+
required: false
25+
type: string
26+
27+
runs:
28+
using: "composite"
29+
steps:
30+
- name: Run command
31+
shell: bash
32+
env:
33+
PRE_COMMAND: ${{ inputs.pre-command }}
34+
MAX_RETRY: ${{ inputs.max_retry }}
35+
COMMAND: ${{ inputs.command }}
36+
CLEANUP: ${{ inputs.cleanup }}
37+
POST_COMMAND: ${{ inputs.post-command }}
38+
run: |
39+
eval "$PRE_COMMAND"
40+
41+
retry_counter=0
42+
while [ $retry_counter -lt $MAX_RETRY ]; do
43+
attempt_failed=0
44+
eval "$COMMAND" || attempt_failed=$?
45+
46+
if [ $attempt_failed -ne 0 ]; then
47+
eval "$CLEANUP"
48+
retry_counter=$(($retry_counter+1))
49+
sleep 5
50+
else
51+
break
52+
fi
53+
54+
if [ $retry_counter -eq $max_retry ]; then
55+
echo "Max retry reached, command failed to execute properly. Exiting code"
56+
exit 1
57+
fi
58+
done
59+
60+
eval "$POST_COMMAND"

.github/workflows/appsignals-python-e2e-eks-test.yml

Lines changed: 43 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,29 @@ jobs:
4646
ref: main
4747
fetch-depth: 0
4848

49+
- name: Download enablement script
50+
uses: actions/checkout@v4
51+
with:
52+
repository: aws-observability/application-signals-demo
53+
ref: main
54+
path: enablement-script
55+
sparse-checkout: |
56+
scripts/eks/appsignals/enable-app-signals.sh
57+
scripts/eks/appsignals/clean-app-signals.sh
58+
sparse-checkout-cone-mode: false
59+
60+
- name: Resolve Add-on configuration conflict
61+
working-directory: enablement-script/scripts/eks/appsignals
62+
run: |
63+
sed -i 's/aws eks create-addon \\/aws eks create-addon \\\n --resolve-conflicts OVERWRITE \\/' enable-app-signals.sh
64+
65+
- name: Remove log group deletion command
66+
if: always()
67+
working-directory: enablement-script/scripts/eks/appsignals
68+
run: |
69+
delete_log_group="aws logs delete-log-group --log-group-name '${{ env.LOG_GROUP_NAME }}' --region \$REGION"
70+
sed -i "s#$delete_log_group##g" clean-app-signals.sh
71+
4972
- name: Generate testing id
5073
run: echo TESTING_ID="${{ inputs.aws-region }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
5174

@@ -100,58 +123,6 @@ jobs:
100123
--region ${{ inputs.aws-region }} \
101124
--approve
102125
103-
# TODO: This step is used for custom pre-release instrumentation
104-
# It is a temporary measure until the cw add-on is released with python support
105-
- name: Setup Helm
106-
uses: azure/setup-helm@v3
107-
108-
# TODO: This step is used for custom pre-release instrumentation
109-
# It is a temporary measure until the cw add-on is released with python support
110-
- name: Checkout Amazon Cloudwatch Agent Operator
111-
uses: actions/checkout@v4
112-
with:
113-
repository: aws/amazon-cloudwatch-agent-operator
114-
# SHA as of March 4
115-
ref: abf75babe672412cb63c56cbcf1c5ce2d8c97a1c
116-
path: amazon-cloudwatch-agent-operator
117-
118-
# TODO: This step is used for custom pre-release instrumentation
119-
# It is a temporary measure until the cw add-on is released with python support
120-
- name: Edit Helm values for Amazon Cloudwatch Agent Operator
121-
working-directory: amazon-cloudwatch-agent-operator/helm/
122-
run: |
123-
sed -i 's/clusterName:/clusterName: ${{ inputs.test-cluster-name }}/g' values.yaml
124-
sed -i 's/region:/region: ${{ inputs.aws-region }}/g' values.yaml
125-
sed -i 's/tag: 1.0.2/tag: 1.1.0/g' values.yaml
126-
if [ ${{ inputs.appsignals-adot-image }} != "" ]; then
127-
echo "Using provided AppSignals ADOT image"
128-
sed -i 's~repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python~repository: ${{ inputs.appsignals-adot-image }}~g' values.yaml
129-
else
130-
echo "AppSignals ADOT image is not provided"
131-
sed -i 's~repository: ghcr.io/open-telemetry/opentelemetry-operator/autoinstrumentation-python~repository: ${{ secrets.TEMP_TEST_ADOT_PYTHON_IMAGE }}~g' values.yaml
132-
fi
133-
if [ ${{ inputs.appsignals-adot-image-tag }} != "" ]; then
134-
echo "Using appsignals-adot-image-tag"
135-
sed -i 's/tag: 0.43b0/tag: ${{ inputs.appsignals-adot-image-tag }}/g' values.yaml
136-
else
137-
echo "appsignals-adot-image-tag is empty"
138-
sed -i 's/tag: 0.43b0/tag: latest/g' values.yaml
139-
fi
140-
141-
# TODO: This step is used for custom pre-release instrumentation
142-
# It is a temporary measure until the cw add-on is released with python support
143-
- name: Create CWA Operator Namespace file
144-
working-directory: amazon-cloudwatch-agent-operator/
145-
run: |
146-
cat <<EOF > ./namespace.yaml
147-
apiVersion: v1
148-
kind: Namespace
149-
metadata:
150-
name: amazon-cloudwatch
151-
labels:
152-
name: amazon-cloudwatch
153-
EOF
154-
155126
- name: Set up terraform
156127
uses: hashicorp/setup-terraform@v3
157128
with:
@@ -192,21 +163,24 @@ jobs:
192163
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint
193164
# after installing App Signals. Attempts to connect will be made for up to 10 minutes
194165
if [ $deployment_failed -eq 0 ]; then
195-
196166
kubectl wait --for=condition=Ready pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
197167
198168
echo "Installing app signals to the sample app"
199-
200-
# TODO: The next 7 lines are used for custom pre-release instrumentation
201-
# It is a temporary measure until the cw add-on is released with python support
202-
cd ${{ github.workspace }}/amazon-cloudwatch-agent-operator/
203-
kubectl apply -f namespace.yaml
204-
helm template amazon-cloudwatch-observability ./helm --include-crds --namespace amazon-cloudwatch | kubectl apply --namespace amazon-cloudwatch --server-side --force-conflicts -f -
205-
206-
kubectl wait --for=condition=Ready pod --all -n amazon-cloudwatch
207-
kubectl delete pods --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
208-
kubectl wait --for=condition=Ready pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
209-
cd ${{ github.workspace }}/terraform/python/eks
169+
source ${GITHUB_WORKSPACE}/.github/workflows/util/execute_and_retry.sh
170+
execute_and_retry 2 \
171+
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/enable-app-signals.sh \
172+
${{ inputs.test-cluster-name }} \
173+
${{ inputs.aws-region }} \
174+
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}" \
175+
"${GITHUB_WORKSPACE}/enablement-script/scripts/eks/appsignals/clean-app-signals.sh \
176+
${{ inputs.test-cluster-name }} \
177+
${{ inputs.aws-region }} \
178+
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }} && \
179+
aws eks update-kubeconfig --name ${{ inputs.test-cluster-name }} --region ${{ inputs.aws-region }}"
180+
181+
182+
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}"
183+
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}"
210184
211185
echo "Attempting to connect to the endpoint"
212186
python_app_endpoint=http://$(terraform output python_app_endpoint)
@@ -374,9 +348,12 @@ jobs:
374348
- name: Clean Up App Signals
375349
if: always()
376350
continue-on-error: true
377-
working-directory: amazon-cloudwatch-agent-operator/
351+
working-directory: enablement-script/scripts/eks/appsignals
378352
run: |
379-
kubectl delete -f ./namespace.yaml
353+
./clean-app-signals.sh \
354+
${{ inputs.test-cluster-name }} \
355+
${{ inputs.aws-region }} \
356+
${{ env.PYTHON_SAMPLE_APP_NAMESPACE }}
380357
381358
# This step also deletes lingering resources from previous test runs
382359
- name: Delete all sample app resources

0 commit comments

Comments
 (0)