1
+ # # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # # SPDX-License-Identifier: Apache-2.0
3
+
4
+ # This is a reusable workflow for running the Python E2E Canary test for Application Signals.
5
+ # It is meant to be called from another workflow.
6
+ # Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
7
+ name : Application Signals Enablement E2E Testing - Python EC2 Asg Use Case
8
+ on :
9
+ workflow_call :
10
+ inputs :
11
+ aws-region :
12
+ required : true
13
+ type : string
14
+ staging_wheel_name :
15
+ required : false
16
+ default : ' aws-opentelemetry-distro'
17
+ type : string
18
+ caller-workflow-name :
19
+ required : true
20
+ type : string
21
+
22
+ permissions :
23
+ id-token : write
24
+ contents : read
25
+
26
+ env :
27
+ SAMPLE_APP_ZIP : s3://${{ secrets.APP_SIGNALS_E2E_EC2_JAR }}-prod-${{ inputs.aws-region }}/python-sample-app.zip
28
+ METRIC_NAMESPACE : ApplicationSignals
29
+ LOG_GROUP_NAME : /aws/application-signals/data
30
+ ADOT_WHEEL_NAME : ${{ inputs.staging_wheel_name }}
31
+ TEST_RESOURCES_FOLDER : ${GITHUB_WORKSPACE}
32
+ GET_ADOT_JAR_COMMAND : " aws s3api get-object --bucket metric-schema-changes --key aws-opentelemetry-agent.jar adot.jar"
33
+ GET_CW_AGENT_RPM_COMMAND : " aws s3api get-object --bucket private-cloudwatch-agent-apm-beta --key linux_amd64/amazon-cloudwatch-agent.rpm cw-agent.rpm"
34
+
35
+
36
+ jobs :
37
+ python-e2e-ec2-asg-test :
38
+ runs-on : ubuntu-latest
39
+ container :
40
+ image : public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest
41
+ steps :
42
+ - uses : actions/checkout@v4
43
+ with :
44
+ repository : aws-observability/aws-application-signals-test-framework
45
+ ref : add-ec2-platform-for-python-ga
46
+
47
+ - name : Set CW Agent RPM environment variable
48
+ run : echo GET_CW_AGENT_RPM_COMMAND="aws s3api get-object --bucket private-cloudwatch-agent-apm-beta --key linux_amd64/amazon-cloudwatch-agent.rpm cw-agent.rpm" >> $GITHUB_ENV
49
+
50
+ - name : Generate testing id
51
+ run : echo TESTING_ID="${{ github.job }}-${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}-${{ github.run_attempt }}" >> $GITHUB_ENV
52
+
53
+ - name : Configure AWS Credentials
54
+ uses : aws-actions/configure-aws-credentials@v4
55
+ with :
56
+ role-to-assume : ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }}
57
+ aws-region : us-east-1
58
+
59
+ - name : Retrieve account
60
+ uses : aws-actions/aws-secretsmanager-get-secrets@v1
61
+ with :
62
+ secret-ids :
63
+ ACCOUNT_ID, region-account/${{ inputs.aws-region }}
64
+
65
+ - name : Configure AWS Credentials
66
+ uses : aws-actions/configure-aws-credentials@v4
67
+ with :
68
+ role-to-assume : arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ secrets.E2E_TEST_ROLE_ARN }}
69
+ aws-region : ${{ inputs.aws-region }}
70
+
71
+ - uses : actions/download-artifact@v3
72
+ if : inputs.caller-workflow-name == 'main-build'
73
+ with :
74
+ name : ${{ inputs.staging_wheel_name }}
75
+
76
+ - name : Upload main-build adot.whl to s3
77
+ if : inputs.caller-workflow-name == 'main-build'
78
+ run : aws s3 cp ${{ inputs.staging_wheel_name }} s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }}
79
+
80
+ - name : Set Get ADOT Wheel command environment variable
81
+ working-directory : terraform/python/ec2
82
+ run : |
83
+ echo GET_ADOT_WHEEL_COMMAND="aws s3api get-object --bucket metric-schema-changes --key aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl && python3.9 -m pip install aws_opentelemetry_distro-0.0.1.dev0-py3-none-any.whl" >> $GITHUB_ENV
84
+ # if [ ${{ inputs.caller-workflow-name }} == "main-build" ]; then
85
+ # # Reusing the adot-main-build-staging-jar bucket to store the python wheel file
86
+ # echo GET_ADOT_WHEEL_COMMAND="aws s3 cp s3://adot-main-build-staging-jar/${{ env.ADOT_WHEEL_NAME }} ./${{ env.ADOT_WHEEL_NAME }} && python3.9 -m pip install ${{ env.ADOT_WHEEL_NAME }}" >> $GITHUB_ENV
87
+ # else
88
+ # echo GET_ADOT_WHEEL_COMMAND="python3.9 -m pip install aws-opentelemetry-distro" >> $GITHUB_ENV
89
+ # fi
90
+
91
+ - name : Initiate Terraform
92
+ uses : ./.github/workflows/actions/execute_and_retry
93
+ with :
94
+ command : " cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/python/ec2/asg && terraform init && terraform validate"
95
+ cleanup : " rm -rf .terraform && rm -rf .terraform.lock.hcl"
96
+
97
+ - name : Deploy sample app via terraform and wait for endpoint to come online
98
+ working-directory : terraform/python/ec2/asg
99
+ run : |
100
+ # Attempt to deploy the sample app on an EC2 instance and wait for its endpoint to come online.
101
+ # There may be occasional failures due to transitivity issues, so try up to 2 times.
102
+ # deployment_failed of 0 indicates that both the terraform deployment and the endpoint are running, while 1 indicates
103
+ # that it failed at some point
104
+ retry_counter=0
105
+ max_retry=2
106
+ while [ $retry_counter -lt $max_retry ]; do
107
+ echo "Attempt $retry_counter"
108
+ deployment_failed=0
109
+ terraform apply -auto-approve \
110
+ -var="aws_region=${{ inputs.aws-region }}" \
111
+ -var="test_id=${{ env.TESTING_ID }}" \
112
+ -var="sample_app_zip=${{ env.SAMPLE_APP_ZIP }}" \
113
+ -var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
114
+ -var="get_adot_wheel_command=${{ env.GET_ADOT_WHEEL_COMMAND }}" \
115
+ || deployment_failed=$?
116
+
117
+ if [ $deployment_failed -eq 1 ]; then
118
+ echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
119
+ fi
120
+
121
+ echo $(terraform output -raw private_key)
122
+
123
+ # If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint.
124
+ # Attempts to connect will be made for up to 10 minutes
125
+ if [ $deployment_failed -eq 0 ]; then
126
+ echo "Attempting to connect to the endpoint"
127
+ main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names python-ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
128
+ main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
129
+ main_service_sample_app_endpoint=http://$main_service_public_ip:8000
130
+ echo "The main service endpoint is $main_service_sample_app_endpoint"
131
+
132
+ attempt_counter=0
133
+ max_attempts=30
134
+ until $(curl --output /dev/null --silent --head --fail $(echo "$main_service_sample_app_endpoint" | tr -d '"')); do
135
+ if [ ${attempt_counter} -eq ${max_attempts} ];then
136
+ echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
137
+ deployment_failed=1
138
+ break
139
+ fi
140
+
141
+ printf '.'
142
+ attempt_counter=$(($attempt_counter+1))
143
+ sleep 10
144
+ done
145
+
146
+ echo "Attempting to connect to the remote sample app endpoint"
147
+ remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8001/healthcheck
148
+ attempt_counter=0
149
+ max_attempts=30
150
+ until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
151
+ if [ ${attempt_counter} -eq ${max_attempts} ];then
152
+ echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
153
+ deployment_failed=1
154
+ break
155
+ fi
156
+
157
+ printf '.'
158
+ attempt_counter=$(($attempt_counter+1))
159
+ sleep 10
160
+ done
161
+ fi
162
+
163
+ # If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
164
+ # resources created from terraform and try again.
165
+ if [ $deployment_failed -eq 1 ]; then
166
+ echo "Destroying terraform"
167
+ terraform destroy -auto-approve \
168
+ -var="test_id=${{ env.TESTING_ID }}"
169
+
170
+ retry_counter=$(($retry_counter+1))
171
+ else
172
+ # If deployment succeeded, then exit the loop
173
+ break
174
+ fi
175
+
176
+ if [ $retry_counter -eq $max_retry ]; then
177
+ echo "Max retry reached, failed to deploy terraform and connect to the endpoint. Exiting code"
178
+ exit 1
179
+ fi
180
+ done
181
+
182
+ - name : Get sample app and EC2 instance information
183
+ working-directory : terraform/python/ec2/asg
184
+ run : |
185
+ main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names python-ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.AWS_DEFAULT_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
186
+ main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
187
+ main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.AWS_DEFAULT_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
188
+ echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
189
+ echo "MAIN_SERVICE_ENDPOINT=$main_service_public_ip:8000" >> $GITHUB_ENV
190
+ echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
191
+ echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
192
+ echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
193
+
194
+ # This steps increases the speed of the validation by creating the telemetry data in advance
195
+ - name : Call all test APIs
196
+ continue-on-error : true
197
+ run : |
198
+ curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call; echo
199
+ curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call; echo
200
+ curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}; echo
201
+ curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call; echo
202
+
203
+ - name : Initiate Gradlew Daemon
204
+ uses : ./.github/workflows/actions/execute_and_retry
205
+ with :
206
+ command : " ./gradlew"
207
+ cleanup : " ./gradlew clean"
208
+ max_retry : 4
209
+ sleep_time : 30
210
+
211
+ # Validation for pulse telemetry data
212
+ - name : Validate generated EMF logs
213
+ id : log-validation
214
+ run : ./gradlew validator:run --args='-c python/ec2/asg/log-validation.yml
215
+ --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
216
+ --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
217
+ --region ${{ inputs.aws-region }}
218
+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
219
+ --log-group ${{ env.LOG_GROUP_NAME }}
220
+ --service-name python-sample-application-${{ env.TESTING_ID }}
221
+ --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
222
+ --request-body ip=${{ env.REMOTE_SERVICE_IP }}
223
+ --instance-ami ${{ env.EC2_INSTANCE_AMI }}
224
+ --platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
225
+ --instance-id ${{ env.INSTANCE_ID }}
226
+ --private-dns-name ${{ env.PRIVATE_DNS_NAME }}
227
+ --rollup'
228
+
229
+ - name : Validate generated metrics
230
+ id : metric-validation
231
+ if : (success() || steps.log-validation.outcome == 'failure') && !cancelled()
232
+ run : ./gradlew validator:run --args='-c python/ec2/asg/metric-validation.yml
233
+ --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
234
+ --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
235
+ --region ${{ inputs.aws-region }}
236
+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
237
+ --log-group ${{ env.LOG_GROUP_NAME }}
238
+ --service-name python-sample-application-${{ env.TESTING_ID }}
239
+ --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
240
+ --request-body ip=${{ env.REMOTE_SERVICE_IP }}
241
+ --instance-ami ${{ env.EC2_INSTANCE_AMI }}
242
+ --platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
243
+ --instance-id ${{ env.INSTANCE_ID }}
244
+ --private-dns-name ${{ env.PRIVATE_DNS_NAME }}
245
+ --rollup'
246
+
247
+ - name : Validate generated traces
248
+ id : trace-validation
249
+ if : (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
250
+ run : ./gradlew validator:run --args='-c python/ec2/asg/trace-validation.yml
251
+ --endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}
252
+ --remote-service-deployment-name ${{ env.REMOTE_SERVICE_IP }}:8001
253
+ --region ${{ inputs.aws-region }}
254
+ --account-id ${{ env.ACCOUNT_ID }}
255
+ --metric-namespace ${{ env.METRIC_NAMESPACE }}
256
+ --log-group ${{ env.LOG_GROUP_NAME }}
257
+ --service-name python-sample-application-${{ env.TESTING_ID }}
258
+ --remote-service-name python-sample-remote-application-${{ env.TESTING_ID }}
259
+ --request-body ip=${{ env.REMOTE_SERVICE_IP }}
260
+ --instance-ami ${{ env.EC2_INSTANCE_AMI }}
261
+ --platform-info python-ec2-single-asg-${{ env.TESTING_ID }}
262
+ --instance-id ${{ env.INSTANCE_ID }}
263
+ --private-dns-name ${{ env.PRIVATE_DNS_NAME }}
264
+ --rollup'
265
+
266
+ - name : Publish metric on test result
267
+ if : always()
268
+ run : |
269
+ if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then
270
+ aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
271
+ --metric-name Failure \
272
+ --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
273
+ --value 0.0 \
274
+ --region ${{ inputs.aws-region }}
275
+ else
276
+ aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
277
+ --metric-name Failure \
278
+ --dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
279
+ --value 1.0 \
280
+ --region ${{ inputs.aws-region }}
281
+ fi
282
+
283
+ # Clean up Procedures
284
+ - name : Terraform destroy
285
+ if : always()
286
+ continue-on-error : true
287
+ working-directory : terraform/python/ec2/asg
288
+ run : |
289
+ terraform destroy -auto-approve \
290
+ -var="test_id=${{ env.TESTING_ID }}"
0 commit comments