Skip to content

Commit abe5925

Browse files
Merge pull request #31 from aws-observability/k8s
[K8s]: E2E test implementation in IAD
2 parents fb18068 + fbe568f commit abe5925

25 files changed

+2580
-2
lines changed

.github/workflows/appsignals-e2e-ec2-test.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ env:
2929
APP_SIGNALS_ADOT_JAR: "https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
3030
METRIC_NAMESPACE: AppSignals
3131
LOG_GROUP_NAME: /aws/appsignals/generic
32-
TEST: ${{ inputs.test }}
3332
GET_ADOT_JAR_COMMAND: "wget -O adot.jar https://github.com/aws-observability/aws-otel-java-instrumentation/releases/latest/download/aws-opentelemetry-agent.jar"
3433
TEST_RESOURCES_FOLDER: /__w/aws-application-signals-test-framework/aws-application-signals-test-framework
3534
GET_CW_AGENT_RPM_COMMAND: "wget -O cw-agent.rpm https://amazoncloudwatch-agent-${{ inputs.aws-region }}.s3.${{ inputs.aws-region }}.amazonaws.com/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
## SPDX-License-Identifier: Apache-2.0
3+
4+
## This workflow aims to run the Application Signals end-to-end tests as a canary to
5+
## test the artifacts for App Signals enablement. It will deploy the CloudWatch Agent
6+
## Operator and our sample app and remote service onto a native K8s cluster, call the
7+
## APIs, and validate the generated telemetry, including logs, metrics, and traces.
8+
## It will then clean up the cluster and EC2 instance it runs on for the next test run.
9+
name: App Signals Enablement - E2E K8s Canary Testing
10+
on:
11+
schedule:
12+
- cron: '*/15 * * * *' # run the workflow every 15 minutes
13+
workflow_dispatch: # be able to run the workflow on demand
14+
15+
permissions:
16+
id-token: write
17+
contents: read
18+
19+
jobs:
20+
e2e-k8s-test:
21+
uses: ./.github/workflows/appsignals-e2e-k8s-test.yml
22+
secrets: inherit
23+
with:
24+
# To run in more regions, a cluster must be provisioned manually on EC2 instances in that region
25+
aws-region: 'us-east-1'
26+
caller-workflow-name: 'appsignals-e2e-k8s-canary-test'
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
## Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
## SPDX-License-Identifier: Apache-2.0
3+
4+
# This is a reusable workflow for running the E2E test for App Signals.
5+
# It is meant to be called from another workflow.
6+
# Read more about reusable workflows: https://docs.github.com/en/actions/using-workflows/reusing-workflows#overview
7+
name: App Signals Enablement E2E Testing - K8s on EC2 Use Case
8+
on:
9+
workflow_call:
10+
inputs:
11+
aws-region:
12+
required: true
13+
type: string
14+
caller-workflow-name:
15+
required: true
16+
type: string
17+
18+
concurrency:
19+
group: '${{ github.workflow }} @ ${{ inputs.aws-region }}'
20+
cancel-in-progress: false
21+
22+
permissions:
23+
id-token: write
24+
contents: read
25+
26+
env:
27+
# The presence of this env var is required for use by terraform and AWS CLI commands
28+
# It is not redundant
29+
AWS_DEFAULT_REGION: ${{ inputs.aws-region }}
30+
TEST_ACCOUNT: ${{ secrets.APP_SIGNALS_E2E_TEST_ACC }}
31+
METRIC_NAMESPACE: AppSignals
32+
LOG_GROUP_NAME: /aws/appsignals/k8s
33+
MASTER_NODE_SSH_KEY: ${{ secrets.APP_SIGNALS_E2E_K8S_SSH_KEY_IAD }}
34+
MAIN_SERVICE_ENDPOINT: ${{ secrets.APP_SIGNALS_E2E_K8S_MASTER_NODE_ENDPOINT }}
35+
SAMPLE_APP_NAMESPACE: sample-app-namespace
36+
TEST_RESOURCES_FOLDER: /__w/aws-application-signals-test-framework/aws-application-signals-test-framework
37+
38+
jobs:
39+
e2e-k8s-test:
40+
runs-on: ubuntu-latest
41+
container:
42+
image: public.ecr.aws/h6o3z5z9/aws-application-signals-test-framework-workflow-container:latest
43+
steps:
44+
- uses: actions/checkout@v4
45+
with:
46+
fetch-depth: 0
47+
48+
- name: Generate testing id
49+
run: echo TESTING_ID="${{ env.AWS_DEFAULT_REGION }}-${{ github.run_id }}-${{ github.run_number }}" >> $GITHUB_ENV
50+
51+
- name: Configure AWS Credentials
52+
uses: aws-actions/configure-aws-credentials@v4
53+
with:
54+
role-to-assume: ${{ secrets.E2E_SECRET_TEST_ROLE_ARN }}
55+
aws-region: us-east-1
56+
57+
- name: Retrieve account
58+
uses: aws-actions/aws-secretsmanager-get-secrets@v1
59+
with:
60+
secret-ids:
61+
ACCOUNT_ID, region-account/${{ env.AWS_DEFAULT_REGION }}
62+
63+
- name: Configure AWS Credentials
64+
uses: aws-actions/configure-aws-credentials@v4
65+
with:
66+
role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ secrets.E2E_TEST_ROLE_ARN }}
67+
aws-region: ${{ env.AWS_DEFAULT_REGION }}
68+
69+
- name: Prepare and upload sample app deployment files
70+
working-directory: terraform/k8s/deploy/resources
71+
run: |
72+
sed -i 's#\${TESTING_ID}#${{ env.TESTING_ID }}#' frontend-service-depl.yaml
73+
sed -i 's#\${IMAGE}#${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ secrets.APP_SIGNALS_E2E_FE_SA_IMG }}#' frontend-service-depl.yaml
74+
sed -i 's#\${TESTING_ID}#${{ env.TESTING_ID }}#' remote-service-depl.yaml
75+
sed -i 's#\${IMAGE}#${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.AWS_DEFAULT_REGION }}.amazonaws.com/${{ secrets.APP_SIGNALS_E2E_RE_SA_IMG }}#' remote-service-depl.yaml
76+
aws s3api put-object --bucket ${{ secrets.APP_SIGNALS_E2E_EC2_JAR }}-prod-${{ env.AWS_DEFAULT_REGION }} --key frontend-service-depl.yaml --body frontend-service-depl.yaml
77+
aws s3api put-object --bucket ${{ secrets.APP_SIGNALS_E2E_EC2_JAR }}-prod-${{ env.AWS_DEFAULT_REGION }} --key remote-service-depl.yaml --body remote-service-depl.yaml
78+
79+
- name: Initiate Terraform
80+
uses: ./.github/workflows/actions/execute_and_retry
81+
with:
82+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/k8s/deploy && terraform init && terraform validate"
83+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
84+
85+
- name: Deploy Operator and Sample App using Terraform
86+
working-directory: terraform/k8s/deploy
87+
run: |
88+
terraform apply -auto-approve \
89+
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
90+
-var="test_id=${{ env.TESTING_ID }}" \
91+
-var="ssh_key=${{ env.MASTER_NODE_SSH_KEY }}" \
92+
-var="host=${{ env.MAIN_SERVICE_ENDPOINT }}"
93+
94+
- name: Get Remote Service IP
95+
run: |
96+
echo REMOTE_SERVICE_IP="$(aws ssm get-parameter --region ${{ env.AWS_DEFAULT_REGION }} --name remote-service-ip | jq -r '.Parameter.Value')" >> $GITHUB_ENV
97+
98+
# This steps increases the speed of the validation by creating the telemetry data in advance
99+
# It is run after the gradle build to give the app time to initialize after the pods become ready
100+
- name: Call all test APIs
101+
continue-on-error: true
102+
run: |
103+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100/outgoing-http-call/; echo
104+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100/aws-sdk-call/; echo
105+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}/; echo
106+
curl -S -s http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100/client-call/; echo
107+
108+
# Validation for pulse telemetry data
109+
- name: Validate generated EMF logs
110+
id: log-validation
111+
run: ./gradlew validator:run --args='-c k8s/log-validation.yml
112+
--testing-id ${{ env.TESTING_ID }}
113+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100
114+
--region ${{ env.AWS_DEFAULT_REGION }}
115+
--account-id ${{ env.ACCOUNT_ID }}
116+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
117+
--log-group ${{ env.LOG_GROUP_NAME }}
118+
--platform-info k8s-cluster-${{ env.TESTING_ID }}
119+
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
120+
--service-name sample-application-${{ env.TESTING_ID }}
121+
--remote-service-name sample-r-app-deployment-${{ env.TESTING_ID }}
122+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
123+
--rollup'
124+
125+
- name: Validate generated metrics
126+
id: metric-validation
127+
if: (success() || steps.log-validation.outcome == 'failure') && !cancelled()
128+
run: ./gradlew validator:run --args='-c k8s/metric-validation.yml
129+
--testing-id ${{ env.TESTING_ID }}
130+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100
131+
--region ${{ env.AWS_DEFAULT_REGION }}
132+
--account-id ${{ env.ACCOUNT_ID }}
133+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
134+
--log-group ${{ env.LOG_GROUP_NAME }}
135+
--platform-info k8s-cluster-${{ env.TESTING_ID }}
136+
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
137+
--service-name sample-application-${{ env.TESTING_ID }}
138+
--remote-service-name sample-r-app-deployment-${{ env.TESTING_ID }}
139+
--remote-service-deployment-name sample-r-app-deployment-${{ env.TESTING_ID }}
140+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
141+
--rollup'
142+
143+
- name: Validate generated traces
144+
id: trace-validation
145+
if: (success() || steps.log-validation.outcome == 'failure' || steps.metric-validation.outcome == 'failure') && !cancelled()
146+
run: ./gradlew validator:run --args='-c k8s/trace-validation.yml
147+
--testing-id ${{ env.TESTING_ID }}
148+
--endpoint http://${{ env.MAIN_SERVICE_ENDPOINT }}:30100
149+
--region ${{ env.AWS_DEFAULT_REGION }}
150+
--account-id ${{ env.ACCOUNT_ID }}
151+
--metric-namespace ${{ env.METRIC_NAMESPACE }}
152+
--log-group ${{ env.LOG_GROUP_NAME }}
153+
--platform-info k8s-cluster-${{ env.TESTING_ID }}
154+
--app-namespace ${{ env.SAMPLE_APP_NAMESPACE }}
155+
--service-name sample-application-${{ env.TESTING_ID }}
156+
--remote-service-name sample-r-app-deployment-${{ env.TESTING_ID }}
157+
--remote-service-deployment-name sample-r-app-deployment-${{ env.TESTING_ID }}
158+
--request-body ip=${{ env.REMOTE_SERVICE_IP }}
159+
--rollup'
160+
161+
- name: Publish metric on test result
162+
if: always()
163+
run: |
164+
if [ "${{ steps.log-validation.outcome }}" = "success" ] && [ "${{ steps.metric-validation.outcome }}" = "success" ] && [ "${{ steps.trace-validation.outcome }}" = "success" ]; then
165+
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
166+
--metric-name Failure \
167+
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
168+
--value 0.0 \
169+
--region ${{ env.AWS_DEFAULT_REGION }}
170+
else
171+
aws cloudwatch put-metric-data --namespace 'ADOT/GitHubActions' \
172+
--metric-name Failure \
173+
--dimensions repository=${{ github.repository }},branch=${{ github.ref_name }},workflow=${{ inputs.caller-workflow-name }} \
174+
--value 1.0 \
175+
--region ${{ env.AWS_DEFAULT_REGION }}
176+
fi
177+
178+
# Clean up Procedures
179+
- name: Initiate Terraform for Cleanup
180+
if: always()
181+
uses: ./.github/workflows/actions/execute_and_retry
182+
with:
183+
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/k8s/cleanup && terraform init && terraform validate"
184+
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
185+
186+
- name: Clean Up Operator and Sample App using Terraform
187+
if: always()
188+
working-directory: terraform/k8s/cleanup
189+
run: |
190+
terraform apply -auto-approve \
191+
-var="aws_region=${{ env.AWS_DEFAULT_REGION }}" \
192+
-var="test_id=${{ env.TESTING_ID }}" \
193+
-var="ssh_key=${{ env.MASTER_NODE_SSH_KEY }}" \
194+
-var="host=${{ env.MAIN_SERVICE_ENDPOINT }}"
195+
196+
- name: Terraform destroy - deployment
197+
if: always()
198+
continue-on-error: true
199+
working-directory: terraform/k8s/deploy
200+
run: |
201+
terraform destroy -auto-approve \
202+
-var="test_id=${{ env.TESTING_ID }}"
203+
204+
- name: Terraform destroy - cleanup
205+
if: always()
206+
continue-on-error: true
207+
working-directory: terraform/k8s/cleanup
208+
run: |
209+
terraform destroy -auto-approve \
210+
-var="test_id=${{ env.TESTING_ID }}"

terraform/k8s/cleanup/main.tf

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
2+
3+
resource "null_resource" "cleanup" {
4+
connection {
5+
type = "ssh"
6+
user = var.user
7+
private_key = var.ssh_key
8+
host = var.host
9+
}
10+
11+
provisioner "remote-exec" {
12+
inline = [
13+
<<-EOF
14+
# Allow terraform to fail any of the following steps without exiting
15+
set +e
16+
17+
# Uninstall the operator and remove the repo from the EC2 instance
18+
echo "LOG: Uninstalling CloudWatch Agent Operator"
19+
helm uninstall --debug --namespace amazon-cloudwatch amazon-cloudwatch-operator --ignore-not-found
20+
echo "LOG: Deleting CloudWatch Agent Operator repo from environment"
21+
[ ! -e amazon-cloudwatch-agent-operator ] || sudo rm -r amazon-cloudwatch-agent-operator
22+
23+
# Delete sample app resources
24+
echo "LOG: Deleting sample app namespace"
25+
kubectl delete namespace sample-app-namespace
26+
echo "LOG: Deleting sample app deployment files"
27+
[ ! -e frontend-service-depl.yaml ] || rm frontend-service-depl.yaml
28+
[ ! -e remote-service-depl.yaml ] || rm remote-service-depl.yaml
29+
sleep 10
30+
31+
# Print cluster state when done clean up procedures
32+
echo "LOG: Printing cluster state after cleanup"
33+
kubectl get pods -A
34+
EOF
35+
]
36+
}
37+
}

terraform/k8s/cleanup/variables.tf

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# ------------------------------------------------------------------------
2+
# Copyright 2023 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License").
5+
# You may not use this file except in compliance with the License.
6+
# A copy of the License is located at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# or in the "license" file accompanying this file. This file is distributed
11+
# on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12+
# express or implied. See the License for the specific language governing
13+
# permissions and limitations under the License.
14+
# -------------------------------------------------------------------------
15+
16+
variable "test_id" {
17+
default = "dummy-123"
18+
}
19+
20+
variable "aws_region" {
21+
default = "<aws-region>"
22+
}
23+
24+
variable "user" {
25+
default = "ec2-user"
26+
}
27+
28+
variable "ssh_key" {
29+
default = "<MASTER_NODE_SSH_KEY>"
30+
description = "This variable is responsible for providing the SSH key of the master node to allow terraform to interact with the cluster"
31+
}
32+
33+
variable "host" {
34+
default = "<HOST_IP_OR_DNS>"
35+
description = "This variable is responsible for defining which host (ec2 instance) we connect to for the K8s-on-EC2 test"
36+
}

0 commit comments

Comments
 (0)