Skip to content

Commit d658431

Browse files
authored
Remove public endpoint eks (#146)
*Issue description:* *Description of changes:* *Ensure you've run the following tests on your changes and include the link below:* To do so, create a `test.yml` file with `name: Test` and workflow description to test your changes, then remove the file for your PR. Link your test run in your PR description. This process is a short term solution while we work on creating a staging environment for testing. NOTE: TESTS RUNNING ON A SINGLE EKS CLUSTER CANNOT BE RUN IN PARALLEL. See the [needs](https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idneeds) keyword to run tests in succession. - Run Java EKS on `e2e-playground` in us-east-1 and eu-central-2 - Run Python EKS on `e2e-playground` in us-east-1 and eu-central-2 - Run metric limiter on EKS cluster `e2e-playground` in us-east-1 and eu-central-2 - Run EC2 tests in all regions - Run K8s on a separate K8s cluster (check IAD test account for master node endpoints; these will change as we create and destroy clusters for OS patching) By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license.
1 parent cc5429d commit d658431

25 files changed

+295
-528
lines changed

.github/workflows/java-ec2-asg-e2e-test.yml

Lines changed: 11 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -122,60 +122,13 @@ jobs:
122122
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
123123
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
124124
-var="get_adot_jar_command=${{ env.GET_ADOT_JAR_COMMAND }}" \
125+
-var="canary_type=${{ github.job }}" \
125126
|| deployment_failed=$?
126127
127128
if [ $deployment_failed -eq 1 ]; then
128129
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
129130
fi
130131
131-
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint.
132-
# Attempts to connect will be made for up to 10 minutes
133-
if [ $deployment_failed -eq 0 ]; then
134-
echo "Attempting to connect to the endpoint"
135-
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.E2E_TEST_AWS_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
136-
main_service_public_ip=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.E2E_TEST_AWS_REGION }} --query "Reservations[].Instances[].PublicIpAddress" --output text)
137-
main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.E2E_TEST_AWS_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
138-
139-
echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
140-
echo "MAIN_SERVICE_ENDPOINT=$main_service_public_ip:8080" >> $GITHUB_ENV
141-
echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
142-
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
143-
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
144-
145-
main_service_sample_app_endpoint=http://$main_service_public_ip:8080
146-
echo "The main service endpoint is $main_service_sample_app_endpoint"
147-
148-
attempt_counter=0
149-
max_attempts=30
150-
until $(curl --output /dev/null --silent --head --fail $(echo "$main_service_sample_app_endpoint" | tr -d '"')); do
151-
if [ ${attempt_counter} -eq ${max_attempts} ];then
152-
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
153-
deployment_failed=1
154-
break
155-
fi
156-
157-
printf '.'
158-
attempt_counter=$(($attempt_counter+1))
159-
sleep 10
160-
done
161-
162-
echo "Attempting to connect to the remote sample app endpoint"
163-
remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8080/healthcheck
164-
attempt_counter=0
165-
max_attempts=30
166-
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
167-
if [ ${attempt_counter} -eq ${max_attempts} ];then
168-
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
169-
deployment_failed=1
170-
break
171-
fi
172-
173-
printf '.'
174-
attempt_counter=$(($attempt_counter+1))
175-
sleep 10
176-
done
177-
fi
178-
179132
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
180133
# resources created from terraform and try again.
181134
if [ $deployment_failed -eq 1 ]; then
@@ -195,14 +148,16 @@ jobs:
195148
fi
196149
done
197150
198-
# This steps increases the speed of the validation by creating the telemetry data in advance
199-
- name: Call all test APIs
200-
continue-on-error: true
201-
run: |
202-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
203-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
204-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
205-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"
151+
- name: Get the sample app and EC2 instance information
152+
working-directory: terraform/java/ec2/asg
153+
run: |
154+
main_service_instance_id=$(aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names ec2-single-asg-${{ env.TESTING_ID }} --region ${{ env.E2E_TEST_AWS_REGION }} --query "AutoScalingGroups[].Instances[0].InstanceId" --output text)
155+
main_service_private_dns_name=$(aws ec2 describe-instances --instance-ids $main_service_instance_id --region ${{ env.E2E_TEST_AWS_REGION }} --query "Reservations[].Instances[].PrivateDnsName" --output text)
156+
echo "INSTANCE_ID=$main_service_instance_id" >> $GITHUB_ENV
157+
echo "MAIN_SERVICE_ENDPOINT=localhost:8080" >> $GITHUB_ENV
158+
echo "PRIVATE_DNS_NAME=$main_service_private_dns_name" >> $GITHUB_ENV
159+
echo "EC2_INSTANCE_AMI=$(terraform output ec2_instance_ami)" >> $GITHUB_ENV
160+
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_private_ip)" >> $GITHUB_ENV
206161
207162
- name: Initiate Gradlew Daemon
208163
if: steps.initiate-gradlew == 'failure'

.github/workflows/java-ec2-default-e2e-test.yml

Lines changed: 8 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ env:
3030
LOG_GROUP_NAME: /aws/application-signals/data
3131
TEST_RESOURCES_FOLDER: ${GITHUB_WORKSPACE}
3232

33-
3433
jobs:
3534
java-ec2-default:
3635
runs-on: ubuntu-latest
@@ -99,7 +98,7 @@ jobs:
9998
- name: Initiate Terraform
10099
uses: ./.github/workflows/actions/execute_and_retry
101100
with:
102-
command: "cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/java/ec2/default && terraform init && terraform validate"
101+
command: "pwd && cd ${{ env.TEST_RESOURCES_FOLDER }}/terraform/java/ec2/default && terraform init && terraform validate"
103102
cleanup: "rm -rf .terraform && rm -rf .terraform.lock.hcl"
104103
max_retry: 6
105104
sleep_time: 60
@@ -123,48 +122,17 @@ jobs:
123122
-var="sample_remote_app_jar=${{ env.SAMPLE_APP_REMOTE_SERVICE_JAR }}" \
124123
-var="get_cw_agent_rpm_command=${{ env.GET_CW_AGENT_RPM_COMMAND }}" \
125124
-var="get_adot_jar_command=${{ env.GET_ADOT_JAR_COMMAND }}" \
125+
-var="canary_type=${{ github.job }}" \
126126
|| deployment_failed=$?
127+
128+
echo $(terraform state show aws_key_pair.aws_ssh_key)
129+
echo $(terraform output -json private_key_content)
130+
127131
128132
if [ $deployment_failed -eq 1 ]; then
129133
echo "Terraform deployment was unsuccessful. Will attempt to retry deployment."
130134
fi
131135
132-
# If the deployment_failed is still 0, then the terraform deployment succeeded and now try to connect to the endpoint.
133-
# Attempts to connect will be made for up to 10 minutes
134-
if [ $deployment_failed -eq 0 ]; then
135-
echo "Attempting to connect to the endpoint"
136-
main_sample_app_endpoint=http://$(terraform output sample_app_main_service_public_dns):8080
137-
attempt_counter=0
138-
max_attempts=30
139-
until $(curl --output /dev/null --silent --head --fail $(echo "$main_sample_app_endpoint" | tr -d '"')); do
140-
if [ ${attempt_counter} -eq ${max_attempts} ];then
141-
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
142-
deployment_failed=1
143-
break
144-
fi
145-
146-
printf '.'
147-
attempt_counter=$(($attempt_counter+1))
148-
sleep 10
149-
done
150-
151-
echo "Attempting to connect to the remote sample app endpoint"
152-
remote_sample_app_endpoint=http://$(terraform output sample_app_remote_service_public_ip):8080/healthcheck
153-
attempt_counter=0
154-
max_attempts=30
155-
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
156-
if [ ${attempt_counter} -eq ${max_attempts} ];then
157-
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
158-
deployment_failed=1
159-
break
160-
fi
161-
162-
printf '.'
163-
attempt_counter=$(($attempt_counter+1))
164-
sleep 10
165-
done
166-
fi
167-
168136
# If the success is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
169137
# resources created from terraform and try again.
170138
if [ $deployment_failed -eq 1 ]; then
@@ -192,19 +160,10 @@ jobs:
192160
- name: Get the sample app and EC2 instance information
193161
working-directory: terraform/java/ec2/default
194162
run: |
195-
echo "MAIN_SERVICE_ENDPOINT=$(terraform output sample_app_main_service_public_dns):8080" >> $GITHUB_ENV
196-
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_public_ip)" >> $GITHUB_ENV
163+
echo "MAIN_SERVICE_ENDPOINT=localhost:8080" >> $GITHUB_ENV
164+
echo "REMOTE_SERVICE_IP=$(terraform output sample_app_remote_service_private_ip)" >> $GITHUB_ENV
197165
echo "MAIN_SERVICE_INSTANCE_ID=$(terraform output main_service_instance_id)" >> $GITHUB_ENV
198166
199-
# This steps increases the speed of the validation by creating the telemetry data in advance
200-
- name: Call all test APIs
201-
continue-on-error: true
202-
run: |
203-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/outgoing-http-call"
204-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
205-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_IP }}&testingId=${{ env.TESTING_ID }}"
206-
curl -S -s "http://${{ env.MAIN_SERVICE_ENDPOINT }}/client-call"
207-
208167
- name: Initiate Gradlew Daemon
209168
if: steps.initiate-gradlew == 'failure'
210169
uses: ./.github/workflows/actions/execute_and_retry

.github/workflows/java-eks-e2e-test.yml

Lines changed: 20 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -232,39 +232,6 @@ jobs:
232232
233233
execute_and_retry 2 "kubectl delete pods --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" "" 60
234234
execute_and_retry 2 "kubectl wait --for=condition=Ready --request-timeout '5m' pod --all -n ${{ env.SAMPLE_APP_NAMESPACE }}" "" 10
235-
236-
echo "Attempting to connect to the main sample app endpoint"
237-
main_sample_app_endpoint=http://$(terraform output sample_app_endpoint)
238-
attempt_counter=0
239-
max_attempts=60
240-
until $(curl --output /dev/null --silent --head --fail $(echo "$main_sample_app_endpoint" | tr -d '"')); do
241-
if [ ${attempt_counter} -eq ${max_attempts} ];then
242-
echo "Failed to connect to endpoint ($main_sample_app_endpoint). Will attempt to redeploy sample app."
243-
deployment_failed=1
244-
break
245-
fi
246-
247-
printf '.'
248-
attempt_counter=$(($attempt_counter+1))
249-
sleep 10
250-
done
251-
252-
echo "Attempting to connect to the remote sample app endpoint"
253-
remote_sample_app_endpoint=http://$(terraform output sample_remote_app_endpoint)/healthcheck
254-
echo $remote_sample_app_endpoint
255-
attempt_counter=0
256-
max_attempts=30
257-
until $(curl --output /dev/null --silent --head --fail $(echo "$remote_sample_app_endpoint" | tr -d '"')); do
258-
if [ ${attempt_counter} -eq ${max_attempts} ];then
259-
echo "Failed to connect to endpoint. Will attempt to redeploy sample app."
260-
deployment_failed=1
261-
break
262-
fi
263-
264-
printf '.'
265-
attempt_counter=$(($attempt_counter+1))
266-
sleep 10
267-
done
268235
fi
269236
270237
# If the deployment_failed is 1 then either the terraform deployment or the endpoint connection failed, so first destroy the
@@ -333,18 +300,28 @@ jobs:
333300
echo "REMOTE_SERVICE_POD_IP=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=remote-app -o jsonpath='{.items[0].status.podIP}')" >> $GITHUB_ENV
334301
335302
- name: Get the sample app endpoint
336-
working-directory: terraform/java/eks
337-
run: echo "APP_ENDPOINT=$(terraform output sample_app_endpoint)" >> $GITHUB_ENV
303+
run: echo "APP_ENDPOINT=$(kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --selector=app=sample-app -o jsonpath='{.items[0].status.podIP}'):8080" >> $GITHUB_ENV
338304

339-
# This steps increases the speed of the validation by creating the telemetry data in advance
340-
- name: Call all test APIs
341-
continue-on-error: true
305+
- name: Deploy the traffic generator
342306
run: |
343-
curl -S -s "http://${{ env.APP_ENDPOINT }}/outgoing-http-call"
344-
curl -S -s "http://${{ env.APP_ENDPOINT }}/aws-sdk-call?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
345-
curl -S -s "http://${{ env.APP_ENDPOINT }}/remote-service?ip=${{ env.REMOTE_SERVICE_POD_IP }}&testingId=${{ env.TESTING_ID }}"
346-
curl -S -s "http://${{ env.APP_ENDPOINT }}/client-call"
347-
curl -S -s "http://${{ env.APP_ENDPOINT }}/mysql"
307+
# Deploy the traffic generator
308+
kubectl create deployment -n ${{ env.SAMPLE_APP_NAMESPACE }} traffic-generator \
309+
--image=${{ env.ACCOUNT_ID }}.dkr.ecr.${{ env.E2E_TEST_AWS_REGION }}.amazonaws.com/e2e-test-resource:traffic-generator \
310+
--replicas=1
311+
312+
# Patch it with ImagePull always policy so that it pulls the latest image from the ECR
313+
kubectl patch deployment -n ${{ env.SAMPLE_APP_NAMESPACE }} traffic-generator --patch '{"spec": {"template": {"spec": {"containers": [{"name": "e2e-test-resource", "imagePullPolicy": "Always"}]}}}}'
314+
315+
# Add the appropriate environment variables to the traffic generator
316+
kubectl set env -n ${{ env.SAMPLE_APP_NAMESPACE }} deployment/traffic-generator MAIN_ENDPOINT=${{ env.APP_ENDPOINT }}
317+
kubectl set env -n ${{ env.SAMPLE_APP_NAMESPACE }} deployment/traffic-generator REMOTE_ENDPOINT=${{ env.REMOTE_SERVICE_POD_IP }}
318+
kubectl set env -n ${{ env.SAMPLE_APP_NAMESPACE }} deployment/traffic-generator ID=${{ env.TESTING_ID }}
319+
kubectl set env -n ${{ env.SAMPLE_APP_NAMESPACE }} deployment/traffic-generator CANARY_TYPE=${{ github.job }}
320+
321+
# Restart the traffic generator with the new configuration
322+
kubectl get pods -n ${{ env.SAMPLE_APP_NAMESPACE }} --no-headers | grep '^traffic-generator' | awk '{print $1}' | xargs kubectl delete pod -n ${{ env.SAMPLE_APP_NAMESPACE }} || true
323+
324+
sleep 10
348325
349326
- name: Initiate Gradlew Daemon
350327
if: steps.initiate-gradlew == 'failure'

0 commit comments

Comments
 (0)