Skip to content

Commit cc5429d

Browse files
authored
Merge branch 'main' into remove-public-endpoint-k8s
2 parents 1d59239 + 827ac08 commit cc5429d

File tree

7 files changed

+295
-25
lines changed

7 files changed

+295
-25
lines changed
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
# This workflow will build and push the traffic generator to each region whenever there is an update made to the traffic-generator folder.
2+
# This image will be used by EKS and K8s test to call sample app endpoints while the zip files will be used by EC2 Platforms
3+
name: Create and Push Traffic Generator
4+
5+
on:
6+
workflow_dispatch:
7+
push:
8+
branches:
9+
- main
10+
paths:
11+
- 'sample-apps/traffic-generator/**'
12+
13+
permissions:
14+
id-token: write
15+
contents: read
16+
17+
env:
18+
E2E_TEST_ACCOUNT_ID: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ACCOUNT_ID }}
19+
E2E_TEST_ROLE_NAME: ${{ secrets.APPLICATION_SIGNALS_E2E_TEST_ROLE_NAME }}
20+
21+
jobs:
22+
build-and-push-image:
23+
runs-on: ubuntu-latest
24+
strategy:
25+
matrix:
26+
aws-region: ['af-south-1','ap-east-1','ap-northeast-1','ap-northeast-2','ap-northeast-3','ap-south-1','ap-south-2','ap-southeast-1',
27+
'ap-southeast-2','ap-southeast-3','ap-southeast-4','ca-central-1','eu-central-1','eu-central-2','eu-north-1',
28+
'eu-south-1','eu-south-2','eu-west-1','eu-west-2','eu-west-3','il-central-1','me-central-1','me-south-1', 'sa-east-1',
29+
'us-east-1','us-east-2', 'us-west-1', 'us-west-2']
30+
steps:
31+
- name: Checkout repository
32+
uses: actions/checkout@v4
33+
34+
- name: Configure AWS Credentials
35+
uses: aws-actions/configure-aws-credentials@v4
36+
with:
37+
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
38+
aws-region: us-east-1
39+
40+
- name: Retrieve account
41+
uses: aws-actions/aws-secretsmanager-get-secrets@v1
42+
with:
43+
secret-ids: |
44+
ACCOUNT_ID, region-account/${{ matrix.aws-region }}
45+
46+
- name: Configure AWS Credentials
47+
uses: aws-actions/configure-aws-credentials@v4
48+
with:
49+
role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
50+
aws-region: ${{ matrix.aws-region }}
51+
52+
- name: Login to Amazon ECR
53+
id: login-ecr
54+
uses: aws-actions/amazon-ecr-login@v2
55+
56+
- name: Build, tag, and push image to Amazon ECR
57+
working-directory: sample-apps/traffic-generator
58+
env:
59+
REGISTRY: ${{ steps.login-ecr.outputs.registry }}
60+
REPOSITORY: e2e-test-resource
61+
IMAGE_TAG: traffic-generator
62+
run: |
63+
docker build -t $REGISTRY/$REPOSITORY:$IMAGE_TAG .
64+
docker push $REGISTRY/$REPOSITORY:$IMAGE_TAG
65+
66+
upload-files-to-s3:
67+
runs-on: ubuntu-latest
68+
strategy:
69+
matrix:
70+
aws-region: ['af-south-1','ap-east-1','ap-northeast-1','ap-northeast-2','ap-northeast-3','ap-south-1','ap-south-2','ap-southeast-1',
71+
'ap-southeast-2','ap-southeast-3','ap-southeast-4','ca-central-1','eu-central-1','eu-central-2','eu-north-1',
72+
'eu-south-1','eu-south-2','eu-west-1','eu-west-2','eu-west-3','il-central-1','me-central-1','me-south-1', 'sa-east-1',
73+
'us-east-1','us-east-2', 'us-west-1', 'us-west-2']
74+
steps:
75+
- name: Checkout repository
76+
uses: actions/checkout@v4
77+
78+
- name: Configure AWS Credentials
79+
uses: aws-actions/configure-aws-credentials@v4
80+
with:
81+
role-to-assume: arn:aws:iam::${{ env.E2E_TEST_ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
82+
aws-region: us-east-1
83+
84+
- name: Retrieve account
85+
uses: aws-actions/aws-secretsmanager-get-secrets@v1
86+
with:
87+
secret-ids: |
88+
ACCOUNT_ID, region-account/${{ matrix.aws-region }}
89+
90+
- name: Configure AWS Credentials
91+
uses: aws-actions/configure-aws-credentials@v4
92+
with:
93+
role-to-assume: arn:aws:iam::${{ env.ACCOUNT_ID }}:role/${{ env.E2E_TEST_ROLE_NAME }}
94+
aws-region: ${{ matrix.aws-region }}
95+
96+
- name: Upload traffic generator files
97+
working-directory: sample-apps/traffic-generator
98+
run: |
99+
zip traffic-generator.zip ./index.js ./package.json
100+
aws s3 cp traffic-generator.zip s3://aws-appsignals-sample-app-prod-${{ matrix.aws-region }}/traffic-generator.zip

.github/workflows/util/clean/ec2_instance_cleanup/cleaner.py

Lines changed: 86 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,74 @@
1515
# Create an EC2 client
1616
session = boto3.Session()
1717
ec2 = session.client('ec2')
18+
autoscaling = session.client('autoscaling')
1819

1920
# configure logging
2021
logging.basicConfig(level=logging.INFO)
2122

23+
def _get_autoscaling_groups_to_delete():
24+
logging.info("Start scanning autoscaling group...")
25+
26+
current_time = datetime.now(timezone.utc)
27+
time_threshold = current_time - timedelta(hours=3)
28+
groups_to_delete = []
29+
30+
# Initialize the paginator
31+
paginator = autoscaling.get_paginator('describe_auto_scaling_groups')
32+
33+
# Iterate through each page of results
34+
for page in paginator.paginate():
35+
auto_scaling_groups = page['AutoScalingGroups']
36+
for asg in auto_scaling_groups:
37+
asg_name = asg['AutoScalingGroupName']
38+
tags = asg['Tags']
39+
40+
eks_tag_present = any(tag['Key'] == 'eks:cluster-name' for tag in tags)
41+
if eks_tag_present:
42+
logging.info(f"Skipping autoscaling group with 'eks:cluster-name' tag: {asg_name}.")
43+
continue
44+
45+
if not _is_active(asg):
46+
logging.info(f"Skipping autoscaling group {asg_name} with terminating instances.")
47+
continue
48+
49+
logging.info(f"autoscaling group {asg_name} is active.")
50+
51+
creation_time = asg['CreatedTime']
52+
if creation_time < time_threshold:
53+
print(f"Autoscaling group: {asg_name} will be deleted.")
54+
groups_to_delete.append(asg)
55+
56+
logging.info(f"{len(groups_to_delete)} autoscaling groups are active for more than 3 hours.")
57+
58+
return groups_to_delete
59+
60+
61+
def _delete_autoscaling_groups(auto_scaling_groups):
62+
for asg in auto_scaling_groups:
63+
try:
64+
asg_name = asg['AutoScalingGroupName']
65+
response = autoscaling.delete_auto_scaling_group(AutoScalingGroupName=asg_name, ForceDelete=True)
66+
logging.info("===== Response for delete autoscaling group request =====")
67+
logging.info(response)
68+
except Exception as e:
69+
logging.info(f"Error terminating instances: {e}")
70+
71+
def _is_active(asg):
72+
for instance in asg['Instances']:
73+
if instance['LifecycleState'] in [
74+
'Terminating', 'Terminating:Wait', 'Terminating:Proceed'
75+
]:
76+
return False
77+
return True
78+
2279

2380
def _get_instances_to_terminate():
2481
# Get all the running instances
25-
logging.info("Getting all running instances")
82+
logging.info("Start scanning instances")
2683
running_filter = [{'Name': 'instance-state-name', 'Values': [INSTANCE_STATE_RUNNING]}]
2784
running_instances = _get_all_instances_by_filter(filters=running_filter)
28-
logging.info(f"Currently {len(running_instances)} are running.")
85+
logging.info(f"{len(running_instances)} instances are running.")
2986

3087
# Filter instances that have been running for more than 3 hours
3188
logging.info("Filtering instances that have been running for more than 3 hours")
@@ -42,10 +99,13 @@ def _get_instances_to_terminate():
4299
logging.info("Filtering instances that should not be terminated based on conditions")
43100
instances_to_terminate = []
44101
for instance in instances_running_more_than_3hrs:
45-
if (not _is_eks_cluster_instance(instance)
46-
and not _is_k8s_cluster_instance(instance)
47-
and not _is_tagged_do_not_delete(instance)):
48-
instances_to_terminate.append(instance)
102+
if (not _is_k8s_cluster_instance(instance) and not _is_tagged_do_not_delete(instance)):
103+
group_name = _get_associated_autoscaling_group_name(instance)
104+
if group_name != None:
105+
logging.info(f"Instance {instance['InstanceId']} is associated with autoscaling group {group_name}, skip the termination.")
106+
else:
107+
instances_to_terminate.append(instance)
108+
49109
logging.info(f"{len(instances_to_terminate)} instances will be terminated.")
50110

51111
return instances_to_terminate
@@ -70,13 +130,6 @@ def _get_all_instances_by_filter(filters: List[dict]):
70130
return filtered_instances
71131

72132

73-
def _is_eks_cluster_instance(instance):
74-
security_groups = instance.get('SecurityGroups', [])
75-
if any(group['GroupName'].startswith(EKS_CLUSTER_SECURITY_GROUP_PREFIX) for group in security_groups):
76-
return True
77-
return False
78-
79-
80133
def _is_k8s_cluster_instance(instance):
81134
tags = instance.get('Tags', [])
82135
if 'Name' in tags and tags['Name'].startswith(K8S_INSTANCE_NAME_PREFIX):
@@ -92,12 +145,21 @@ def _is_tagged_do_not_delete(instance):
92145
return True
93146
return False
94147

95-
96-
def _prepare_report_and_upload(instances_to_terminate) -> bool:
97-
json_data = json.dumps(instances_to_terminate, default=str)
148+
def _get_associated_autoscaling_group_name(instance):
149+
tags = instance.get('Tags', [])
150+
asg_tag = next((tag for tag in tags if tag['Key'] == 'aws:autoscaling:groupName'), None)
151+
if asg_tag is None:
152+
return None
153+
return asg_tag['Value']
154+
155+
def _prepare_report_and_upload(groups_to_delete, instances_to_terminate) -> bool:
156+
json_data = json.dumps({
157+
"autoscalingGroups": groups_to_delete,
158+
"standaloneInstances": instances_to_terminate
159+
}, default=str)
98160
# save as a json file with timestamp
99161
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
100-
filename = f"report-instances-to-terminate-{timestamp}.json"
162+
filename = f"report-resources-to-clean-{timestamp}.json"
101163
with open(filename, "w") as f:
102164
f.write(json_data)
103165

@@ -116,24 +178,26 @@ def _prepare_report_and_upload(instances_to_terminate) -> bool:
116178
def _terminate_instances(instances_to_terminate):
117179
# Terminate the instances
118180
instance_ids = [instance['InstanceId'] for instance in instances]
119-
logging.info("Number of instances terminating: " + str(len(instance_ids)))
120181
try:
121182
response = ec2.terminate_instances(InstanceIds=instance_ids)
122-
logging.info("===== Response for terminate request =====")
183+
logging.info("===== Response for terminate instances request =====")
123184
logging.info(response)
124185
except Exception as e:
125186
logging.info(f"Error terminating instances: {e}")
126187

127188

128189
if __name__ == '__main__':
190+
groups = _get_autoscaling_groups_to_delete()
129191
instances = _get_instances_to_terminate()
130-
if len(instances) == 0:
131-
logging.info("No instances to terminate")
192+
193+
if len(groups) == 0 and len(instances) == 0:
194+
logging.info("No resource to terminate")
132195
exit(0)
133196

134-
report_successful = _prepare_report_and_upload(instances)
197+
report_successful = _prepare_report_and_upload(groups, instances)
135198
if not report_successful:
136199
logging.error("Failed to prepare report and upload. Aborting termination of instances.")
137200
exit(1)
138201

202+
_delete_autoscaling_groups(groups)
139203
_terminate_instances(instances)

.gitignore

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,7 @@
1+
.DS_Store
12
.idea
23
# Ignore Gradle project-specific cache directory
34
.gradle
45

56
# Ignore Gradle build output directory
67
build
7-
8-
# Ignore the resource cleanup reports
9-
**/report-*.json
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Use the official lightweight Node.js 16 image.
2+
# https://hub.docker.com/_/node
3+
# FROM node:16-slim
4+
FROM public.ecr.aws/eks-distro-build-tooling/nodejs:16
5+
6+
# Create and change to the app directory
7+
WORKDIR /usr/src/app
8+
9+
# Copy application dependency manifests to the container image.
10+
# A wildcard is used to ensure copying both package.json AND package-lock.json (if available).
11+
# Copying this first prevents re-running npm install on every code change.
12+
COPY package*.json ./
13+
14+
# Install dependencies
15+
RUN npm install
16+
17+
# Copy local code to the container image.
18+
COPY . .
19+
20+
# Run the web service on container startup.
21+
CMD [ "npm", "start" ]
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
const axios = require('axios');
2+
3+
// Send API requests to the sample app
4+
const sendRequests = async (urls) => {
5+
try {
6+
const fetchPromises = urls.map(url => axios.get(url));
7+
const responses = await Promise.all(fetchPromises);
8+
9+
// Handle the responses
10+
responses.forEach((response, index) => {
11+
if (response.status === 200) {
12+
const data = response.data;
13+
console.log(`Response from ${urls[index]}:`, data);
14+
} else {
15+
console.error(`Failed to fetch ${urls[index]}:`, response.statusText);
16+
}
17+
});
18+
} catch (error) {
19+
console.error('Error sending GET requests:', error);
20+
}
21+
}
22+
23+
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms));
24+
25+
// This loop will run until the environment variables are available
26+
const waitForEnvVariables = async () => {
27+
while (!process.env.MAIN_ENDPOINT || !process.env.REMOTE_ENDPOINT || !process.env.ID || !process.env.CANARY_TYPE) {
28+
console.log('Environment variables not set. Waiting for 10 seconds...');
29+
await sleep(10000); // Wait for 10 seconds
30+
}
31+
};
32+
33+
// Traffic generator that sends traffic every specified interval. Send request immediately then every 2 minutes afterwords
34+
const trafficGenerator = async (interval) => {
35+
await waitForEnvVariables();
36+
37+
const mainEndpoint = process.env.MAIN_ENDPOINT;
38+
const remoteEndpoint = process.env.REMOTE_ENDPOINT;
39+
const id = process.env.ID;
40+
const canaryType = process.env.CANARY_TYPE
41+
42+
let urls = [
43+
`http://${mainEndpoint}/outgoing-http-call`,
44+
`http://${mainEndpoint}/aws-sdk-call?ip=${remoteEndpoint}&testingId=${id}`,
45+
`http://${mainEndpoint}/remote-service?ip=${remoteEndpoint}&testingId=${id}`,
46+
`http://${mainEndpoint}/client-call`
47+
];
48+
49+
if (canaryType === 'java-eks' || canaryType === 'python-eks') {
50+
urls.push(`http://${mainEndpoint}/mysql`)
51+
}
52+
53+
// Need to call some APIs so that it exceeds the metric limiter threshold and make the test
54+
// APIs generate AllOtherOperations metric. Sleep for a minute to let cloudwatch service process the API call
55+
// Calling it here before calling the remote sample app endpoint because the API generated by it is validated
56+
// for AllOtherRemoteOperations in the metric validation step
57+
if (canaryType === 'java-metric-limiter'){
58+
const fakeUrls = [
59+
`http://${mainEndpoint}`,
60+
`http://${mainEndpoint}/fake-endpoint`
61+
]
62+
// Send the fake requests and wait a minute
63+
await sendRequests(fakeUrls);
64+
await sleep(60000);
65+
}
66+
67+
await sendRequests(urls);
68+
setInterval(() => sendRequests(urls), interval);
69+
}
70+
71+
const interval = 60 * 1000;
72+
// Start sending GET requests every minute (60,000 milliseconds)
73+
trafficGenerator(interval);
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"name": "traffic-generator",
3+
"version": "1.0.0",
4+
"description": "A simple traffic generator that sends GET requests to a list of URLs every 2 minutes",
5+
"main": "index.js",
6+
"scripts": {
7+
"start": "node index.js"
8+
},
9+
"dependencies": {
10+
"axios": "^1.4.0"
11+
}
12+
}

terraform/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
.terraform*
2+
terraform.tfstate*

0 commit comments

Comments
 (0)