Skip to content

Commit b02b951

Browse files
authored
Update ASG cluster debug info (#1584)
1 parent 009863c commit b02b951

File tree

1 file changed

+35
-2
lines changed

1 file changed

+35
-2
lines changed

manager/debug.sh

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,49 @@ done
4040

4141
mkdir -p /cortex-debug/logs
4242
kubectl get pods --all-namespaces -o json | jq '.items[] | . as $parent | $parent.spec.containers[]? | "kubectl logs -n \($parent.metadata.namespace) \($parent.metadata.name) \(.name) --timestamps --tail=10000 > /cortex-debug/logs/\($parent.metadata.namespace).\($parent.metadata.name).\(.name) 2>&1 && echo -n ."' | xargs -n 1 bash -c
43+
echo -n "."
4344
kubectl get pods --all-namespaces -o json | jq '.items[] | . as $parent | $parent.spec.initContainers[]? | "kubectl logs -n \($parent.metadata.namespace) \($parent.metadata.name) \(.name) --timestamps --tail=10000 > /cortex-debug/logs/\($parent.metadata.namespace).\($parent.metadata.name).init.\(.name) 2>&1 && echo -n ."' | xargs -n 1 bash -c
45+
echo -n "."
4446

4547
kubectl top pods --all-namespaces --containers=true > "/cortex-debug/k8s/top_pods" 2>&1
48+
echo -n "."
4649
kubectl top nodes > "/cortex-debug/k8s/top_nodes" 2>&1
50+
echo -n "."
4751

4852
mkdir -p /cortex-debug/aws/amis
49-
aws autoscaling describe-auto-scaling-groups --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asgs" 2>&1
53+
54+
asg_on_demand_info=$(aws autoscaling describe-auto-scaling-groups --region $CORTEX_REGION --query "AutoScalingGroups[?contains(Tags[?Key==\`alpha.eksctl.io/cluster-name\`].Value, \`$CORTEX_CLUSTER_NAME\`)]|[?contains(Tags[?Key==\`alpha.eksctl.io/nodegroup-name\`].Value, \`ng-cortex-worker-on-demand\`)]")
5055
echo -n "."
51-
aws autoscaling describe-scaling-activities --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-activities" 2>&1
56+
asg_on_demand_name=""
57+
asg_on_demand_length=$(echo "$asg_on_demand_info" | jq -r 'length')
58+
if (( "$asg_on_demand_length" > "0" )); then
59+
asg_on_demand_name=$(echo "$asg_on_demand_info" | jq -r 'first | .AutoScalingGroupName')
60+
aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names $asg_on_demand_name --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-on-demand" 2>&1
61+
echo -n "."
62+
aws autoscaling describe-scaling-activities --max-items 1000 --auto-scaling-group-name $asg_on_demand_name --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-activities-on-demand" 2>&1
63+
echo -n "."
64+
fi
65+
66+
asg_spot_info=$(aws autoscaling describe-auto-scaling-groups --region $CORTEX_REGION --query "AutoScalingGroups[?contains(Tags[?Key==\`alpha.eksctl.io/cluster-name\`].Value, \`$CORTEX_CLUSTER_NAME\`)]|[?contains(Tags[?Key==\`alpha.eksctl.io/nodegroup-name\`].Value, \`ng-cortex-worker-spot\`)]")
5267
echo -n "."
68+
asg_spot_name=""
69+
asg_spot_length=$(echo "$asg_spot_info" | jq -r 'length')
70+
if (( "$asg_spot_length" > "0" )); then
71+
asg_spot_name=$(echo "$asg_spot_info" | jq -r 'first | .AutoScalingGroupName')
72+
aws autoscaling describe-auto-scaling-groups --auto-scaling-group-names $asg_spot_name --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-spot" 2>&1
73+
echo -n "."
74+
aws autoscaling describe-scaling-activities --max-items 1000 --auto-scaling-group-name $asg_spot_name --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-activities-spot" 2>&1
75+
echo -n "."
76+
fi
77+
78+
# failsafe in case the asg(s) could not be located
79+
if [ "$asg_on_demand_name" == "" ] && [ "$asg_spot_name" == "" ]; then
80+
aws autoscaling describe-auto-scaling-groups --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asgs" 2>&1
81+
echo -n "."
82+
aws autoscaling describe-scaling-activities --max-items 1000 --region=$CORTEX_REGION --output json > "/cortex-debug/aws/asg-activities" 2>&1
83+
echo -n "."
84+
fi
85+
5386
aws ec2 describe-instances --filters Name=tag:cortex.dev/cluster-name,Values=$CORTEX_CLUSTER_NAME --region=$CORTEX_REGION --output json > "/cortex-debug/aws/instances" 2>&1
5487
echo -n "."
5588
aws ec2 describe-instance-status --include-all-instances --region=$CORTEX_REGION --output json > "/cortex-debug/aws/instance-statuses" 2>&1

0 commit comments

Comments
 (0)