Skip to content

Commit 22a4094

Browse files
committed
Add EC2 Platform Test to Python
1 parent 45b3650 commit 22a4094

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2278
-45
lines changed
Lines changed: 290 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,290 @@
1+
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
## SPDX-License-Identifier: Apache-2.0
3+
4+
# This is a reusable workflow for running the Python E2E Canary test for Application Signals.
5+
# It is meant to be called from another workflow.
6+
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
7+
name: Application Signals Enablement E2E Testing - Python EC2 Asg Use Case
8+
on:
9+
workflow_call:
10+
inputs:
11+
aws-region:
12+
required: true
13+
type: string
14+
staging_wheel_name:
15+
required: false
16+
default: 'aws-opentelemetry-distro'
17+
type: string
18+
caller-workflow-name:
19+
required: true
20+
type: string
21+
22+
permissions:
23+
id-token: write
24+
contents: read
25+
26+
env:
27+
SAMPLE_APP_ZIP: s3://${{ secrets.APP_SIGNALS_E2E_EC2_JAR }}-prod-${{ inputs.aws-region }}/python-sample-app.zip
28+
METRIC_NAMESPACE: ApplicationSignals
29+
LOG_GROUP_NAME: /aws/application-signals/data
30+
ADOT_WHEEL_NAME: ${{ inputs.staging_wheel_name }}
31+
TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE}
32+
GET_ADOT_JAR_COMMAND: "aws s3api get-object --bucket metric-schema-changes --key aws-opentelemetry-agent.jar adot.jar"
33+
GET_CW_AGENT_RPM_COMMAND: "aws s3api get-object --bucket private-cloudwatch-agent-apm-beta --key linux_amd64/amazon-cloudwatch-agent.rpm cw-agent.rpm"
34+
35+
36+
jobs:
37+
python-e2e-ec2-asg-test:
38+
runs-on: ubuntu-latest
39+
container:
40+
image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest
41+
steps:
42+
- uses: actions/checkout@v4
43+
with:
44+
repository: aws-observability/aws-application-signals-test-framework
45+
ref: add-ec2-platform-for-python-ga
46+
47+
- name: Set CW Agent RPM environment variable
48+
run: echo GET_CW_AGENT_RPM_COMMAND="aws s3api get-object --bucket private-cloudwatch-agent-apm-beta --key linux_amd64/amazon-cloudwatch-agent.rpm cw-agent.rpm" >> $GITHUB_ENV
49+
50+
- name: Generate testing id
51+
run: echo TESTING_ID="${{ github.job }}-${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV
52+
53+
- name: Configure AWS Credentials
54+
uses: aws-actions/configure-aws-credentials@v4
55+
with:
56+
role-to-assume: ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }}
57+
aws-region: us-east-1
58+
59+
- name: Retrieve account
60+
uses: aws-actions/aws-secretsmanager-get-secrets@v1
61+
with:
62+
secret-ids:
63+
ACCOUNT_ID, region-account/${{ inputs.aws-region }}
64+
65+
- name: Configure AWS Credentials
66+
uses: aws-actions/configure-aws-credentials@v4
67+
with:
68+
role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ secrets.E2E_TEST_ROLE_ARN }}
69+
aws-region: ${{ inputs.aws-region }}
70+
71+
- uses: actions/download-artifact@v3
72+
if: inputs.caller-workflow-name == 'main-build'
73+
with:
74+
name: ${{ inputs.staging_wheel_name }}
75+
76+
- name: Upload main-build adot.whl to s3
77+
if: inputs.caller-workflow-name == 'main-build'
78+
run: aws s3 cp ${{ inputs.staging_wheel_name }} s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }}
79+
80+
- name: Set Get ADOT Wheel command environment variable
81+
working-directory: terraform/python/ec2
82+
run: |
83+
echo GET_ADOT_WHEEL_COMMAND="aws s3api get-object --bucket metric-schema-changes --key aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl && python3.9 -m pip install aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl" >> $GITHUB_ENV
84+
# if [ ${{ inputs.caller-workflow-name }} == "main-build" ]; then
85+
# # Reusing the adot-main-build-staging-jar bucket to store the python wheel file
86+
# echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && python3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV
87+
# else
88+
# echo GET_ADOT_WHEEL_COMMAND="python3.9 -m pip install aws-opentelemetry-distro" >> $GITHUB_ENV
89+
# fi
90+
91+
- name: Initiate Terraform
92+
uses: ./.github/workflows/actions/execute_and_retry
93+
with:
94+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/asg && terraform init && terraform validate"
95+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
96+
97+
- name: Deploy sample app via terraform and wait for endpoint to come online
98+
working-directory: terraform/python/ec2/asg
99+
run: |
100+
# Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online.
101+
# There may be occasional failures due to transitivity issues, so try up to 2 times.
102+
# deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
103+
# that it failed at some point
104+
retry_counter=0
105+
max_retry=2
106+
while [ $retry_counter -lt $max_retry ]; do
107+
echo "Attempt $retry_counter"
108+
deployment_failed=0
109+
terraform apply -auto-approve \
110+
-var="aws_region=${{ inputs.aws-region }}" \
111+
-var="test_id=${{ env.TESTING_ID }}" \
112+
-var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" \
113+
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
114+
-var="get_adot_wheel_command=${{ env.GET_ADOT_WHEEL_COMMAND }}" \
115+
|| deployment_failed=$?
116+
117+
if [ $deployment_failed -eq 1 ]; then
118+
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
119+
fi
120+
121+
echo $(terraform output -raw private_key)
122+
123+
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint.
124+
# Attempts to connect will be made for up to 10 minutes
125+
if [ $deployment_failed -eq 0 ]; then
126+
echo "Attempting to connect to the endpoint"
127+
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names python-ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
128+
main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
129+
main_service_sample_app_endpoint=http://$main_service_public_ip:8000
130+
echo "The main service endpoint is $main_service_sample_app_endpoint"
131+
132+
attempt_counter=0
133+
max_attempts=30
134+
until $(curl --output /dev/null --silent --head --fail $(echo "$main_service_sample_app_endpoint" | tr -d '"')); do
135+
if [ ${attempt_counter} -eq ${max_attempts} ];then
136+
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
137+
deployment_failed=1
138+
break
139+
fi
140+
141+
printf '.'
142+
attempt_counter=$(($attempt_counter+1))
143+
sleep 10
144+
done
145+
146+
echo "Attempting to connect to the remote sample app endpoint"
147+
remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8001/healthcheck
148+
attempt_counter=0
149+
max_attempts=30
150+
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
151+
if [ ${attempt_counter} -eq ${max_attempts} ];then
152+
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
153+
deployment_failed=1
154+
break
155+
fi
156+
157+
printf '.'
158+
attempt_counter=$(($attempt_counter+1))
159+
sleep 10
160+
done
161+
fi
162+
163+
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
164+
# resources created from terraform and try again.
165+
if [ $deployment_failed -eq 1 ]; then
166+
echo "Destroying terraform"
167+
terraform destroy -auto-approve \
168+
-var="test_id=${{ env.TESTING_ID }}"
169+
170+
retry_counter=$(($retry_counter+1))
171+
else
172+
# If deployment succeeded, then exit the loop
173+
break
174+
fi
175+
176+
if [ $retry_counter -eq $max_retry ]; then
177+
echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
178+
exit 1
179+
fi
180+
done
181+
182+
- name: Get sample app and EC2 instance information
183+
working-directory: terraform/python/ec2/asg
184+
run: |
185+
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names python-ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
186+
main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
187+
main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
188+
echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
189+
echo "MAIN_SERVICE_ENDPOINT=$main_service_public_ip:8000" >> $GITHUB_ENV
190+
echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
191+
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
192+
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
193+
194+
# This steps increases the speed of the validation by creating the telemetry data in advance
195+
- name: Call all test APIs
196+
continue-on-error: true
197+
run: |
198+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call; echo
199+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call; echo
200+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}; echo
201+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call; echo
202+
203+
- name: Initiate Gradlew Daemon
204+
uses: ./.github/workflows/actions/execute_and_retry
205+
with:
206+
command: "./gradlew"
207+
cleanup: "./gradlew clean"
208+
max_retry: 4
209+
sleep_time: 30
210+
211+
# Validation for pulse telemetry data
212+
- name: Validate generated EMF logs
213+
id: log-validation
214+
run: ./gradlew validator:run --args='-c python/ec2/asg/log-validation.yml
215+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
216+
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
217+
--region ${{ inputs.aws-region }}
218+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
219+
--log-group ${{ env.LOG_GROUP_NAME }}
220+
--service-name python-sample-application-${{ env.TESTING_ID }}
221+
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
222+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
223+
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
224+
--platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
225+
--instance-id ${{ env.INSTANCE_ID }}
226+
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
227+
--rollup'
228+
229+
- name: Validate generated metrics
230+
id: metric-validation
231+
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled()
232+
run: ./gradlew validator:run --args='-c python/ec2/asg/metric-validation.yml
233+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
234+
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
235+
--region ${{ inputs.aws-region }}
236+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
237+
--log-group ${{ env.LOG_GROUP_NAME }}
238+
--service-name python-sample-application-${{ env.TESTING_ID }}
239+
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
240+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
241+
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
242+
--platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
243+
--instance-id ${{ env.INSTANCE_ID }}
244+
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
245+
--rollup'
246+
247+
- name: Validate generated traces
248+
id: trace-validation
249+
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
250+
run: ./gradlew validator:run --args='-c python/ec2/asg/trace-validation.yml
251+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
252+
--remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
253+
--region ${{ inputs.aws-region }}
254+
--account-id ${{ env.ACCOUNT_ID }}
255+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
256+
--log-group ${{ env.LOG_GROUP_NAME }}
257+
--service-name python-sample-application-${{ env.TESTING_ID }}
258+
--remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
259+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
260+
--instance-ami ${{ env.EC2_INSTANCE_AMI }}
261+
--platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
262+
--instance-id ${{ env.INSTANCE_ID }}
263+
--private-dns-name ${{ env.PRIVATE_DNS_NAME }}
264+
--rollup'
265+
266+
- name: Publish metric on test result
267+
if: always()
268+
run: |
269+
if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then
270+
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
271+
--metric-name Failure \
272+
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
273+
--value 0.0 \
274+
--region ${{ inputs.aws-region }}
275+
else
276+
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
277+
--metric-name Failure \
278+
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
279+
--value 1.0 \
280+
--region ${{ inputs.aws-region }}
281+
fi
282+
283+
# Clean up Procedures
284+
- name: Terraform destroy
285+
if: always()
286+
continue-on-error: true
287+
working-directory: terraform/python/ec2/asg
288+
run: |
289+
terraform destroy -auto-approve \
290+
-var="test_id=${{ env.TESTING_ID }}"

0 commit comments

Comments
 (0)