Skip to content

Commit 26990d9

Browse files
authored
Merge pull request #1117 from stackhpc/2023.1-diagnostics
2023.1: CI: Collect diagnostic information at the end of aio jobs
2 parents 699769c + 834110b commit 26990d9

File tree

4 files changed

+226
-0
lines changed

4 files changed

+226
-0
lines changed

.github/workflows/stackhpc-all-in-one.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ jobs:
179179
OS_APPLICATION_CREDENTIAL_SECRET: ${{ secrets.OS_APPLICATION_CREDENTIAL_SECRET }}
180180

181181
- name: Terraform Apply
182+
id: tf_apply
182183
run: |
183184
for attempt in $(seq 5); do
184185
if terraform apply -auto-approve; then
@@ -355,6 +356,7 @@ jobs:
355356
if: inputs.upgrade
356357

357358
- name: Tempest tests
359+
id: tempest
358360
run: |
359361
mkdir -p tempest-artifacts
360362
docker run -t --rm \
@@ -380,13 +382,29 @@ jobs:
380382
env:
381383
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
382384

385+
- name: Collect diagnostic information
386+
id: diagnostics
387+
run: |
388+
mkdir -p diagnostics
389+
sudo -E docker run -t --rm \
390+
-v $(pwd):/stack/kayobe-automation-env/src/kayobe-config \
391+
-v $(pwd)/diagnostics:/stack/diagnostics \
392+
-e KAYOBE_ENVIRONMENT -e KAYOBE_VAULT_PASSWORD -e KAYOBE_AUTOMATION_SSH_PRIVATE_KEY \
393+
$KAYOBE_IMAGE \
394+
/stack/kayobe-automation-env/src/kayobe-config/.automation/pipeline/playbook-run.sh '$KAYOBE_CONFIG_PATH/ansible/diagnostics.yml'
395+
env:
396+
KAYOBE_AUTOMATION_SSH_PRIVATE_KEY: ${{ steps.ssh_key.outputs.ssh_key }}
397+
if: ${{ !cancelled() && steps.tf_apply.outcome == 'success' }}
398+
383399
- name: Upload test result artifacts
384400
uses: actions/upload-artifact@v4
385401
with:
386402
name: test-results-${{ inputs.os_distribution }}-${{ inputs.os_release }}-${{ inputs.neutron_plugin }}${{ inputs.upgrade && '-upgrade' || '' }}
387403
path: |
404+
diagnostics/
388405
tempest-artifacts/
389406
sot-results/
407+
if: ${{ !cancelled() && (steps.tempest.outcome == 'success' || steps.stackhpc-openstack-tests.outcome == 'success' || steps.diagnostics.outcome == 'success') }}
390408

391409
- name: Fail if any Tempest tests failed
392410
run: |

etc/kayobe/ansible/diagnostics.yml

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
---
2+
# This playbook runs a script that collects diagnostic information from hosts.
3+
# The diagnostics are aggregated to a directory
4+
# (diagnostics_path_local/inventory_hostname) on localhost.
5+
#
6+
# NOTE: The diagnostic information contains sensitive information such as
7+
# passwords in configuration files.
8+
9+
- name: Collect diagnostic information
10+
hosts: seed-hypervisor:seed:overcloud:infra-vms
11+
vars:
12+
diagnostics_path_local: "{{ lookup('env', 'PWD') }}/diagnostics"
13+
tasks:
14+
- block:
15+
- name: Create a temporary directory for diagnostics
16+
ansible.builtin.tempfile:
17+
state: directory
18+
suffix: diagnostics
19+
register: diagnostics_tmpdir
20+
21+
- name: Write host variables to a file
22+
ansible.builtin.copy:
23+
content: "{{ hostvars[inventory_hostname].ansible_facts | to_nice_json }}"
24+
dest: "{{ diagnostics_tmpdir.path }}/facts.json"
25+
26+
- name: Run diagnostics script
27+
ansible.builtin.script: "{{ kayobe_config_path }}/../../tools/diagnostics.sh"
28+
become: true
29+
failed_when: diagnostics_result.rc is not defined
30+
register: diagnostics_result
31+
environment:
32+
LOG_DIR: "{{ diagnostics_tmpdir.path }}"
33+
CONFIG_DIR: "{{ kayobe_config_path }}/../.."
34+
35+
- name: Download diagnostic logs to localhost
36+
ansible.posix.synchronize:
37+
src: "{{ diagnostics_tmpdir.path }}/"
38+
dest: "{{ diagnostics_path_local }}/{{ inventory_hostname }}"
39+
mode: pull
40+
archive: no
41+
recursive: true
42+
copy_links: true
43+
verify_host: true
44+
# For jump host
45+
use_ssh_args: true
46+
always:
47+
- name: Clean up temporary directory
48+
ansible.builtin.file:
49+
path: "{{ diagnostics_tmpdir.path }}"
50+
state: absent
51+
52+
- name: Display diagnostics collection stdout
53+
ansible.builtin.debug:
54+
msg: "{{ diagnostics_result.stdout }}"
55+
when: diagnostics_result.stdout is defined
56+
57+
- name: Display diagnostics collection stderr
58+
ansible.builtin.debug:
59+
msg: "{{ diagnostics_result.stderr }}"
60+
when: diagnostics_result.stderr is defined
61+
62+
- name: Fail if diagnostics collection failed
63+
ansible.builtin.fail:
64+
msg: Diagnostics collection failed
65+
when: diagnostics_result.rc != 0
66+
67+
- name: Display location of diagnostics archive
68+
ansible.builtin.debug:
69+
msg: >-
70+
Wrote diagnostics to {{ diagnostics_path_local }} on localhost
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
features:
3+
- |
4+
Adds a new ``diagnostics.yml`` playbook that collects diagnostic
5+
information from hosts. The diagnostics are aggregated to a directory
6+
(``$PWD/diagnostics/`` by default) on localhost. The diagnostics include:
7+
8+
* Docker container logs
9+
* Kolla configuration files
10+
* Log files
11+
12+
*The collected diagnostic information contains sensitive information such
13+
as passwords in configuration files.*
14+

tools/diagnostics.sh

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
#!/bin/bash
2+
3+
# NOTE(mgoddard): This has been adapted from
4+
# roles/kayobe-diagnostics/files/get_logs.sh in Kayobe.
5+
6+
# Environment variables:
7+
# $LOG_DIR is the directory to copy logs to.
8+
9+
# TODO: Make this script more robust and use set -e.
10+
set +o errexit
11+
set -u
12+
13+
copy_logs() {
14+
mkdir -p ${LOG_DIR}/{docker_logs,kolla_node_configs,system_logs}
15+
16+
cp -rnL /etc/kolla/* ${LOG_DIR}/kolla_node_configs
17+
# Don't save the IPA images.
18+
rm ${LOG_DIR}/kolla_node_configs/ironic-http/ironic-agent.{kernel,initramfs}
19+
rm ${LOG_DIR}/kolla_node_configs/ironic-tftp/ironic-agent.{kernel,initramfs}
20+
21+
if [[ -d /opt/kayobe/etc/kolla ]]; then
22+
mkdir -p ${LOG_DIR}/kolla_build_configs
23+
cp -rnL /opt/kayobe/etc/kolla/* ${LOG_DIR}/kolla_build_configs/
24+
fi
25+
26+
cp -rvnL /var/log/* ${LOG_DIR}/system_logs/
27+
28+
journalctl --no-pager > ${LOG_DIR}/system_logs/syslog.log
29+
journalctl --no-pager -u docker.service > ${LOG_DIR}/system_logs/docker.log
30+
journalctl --no-pager -u vbmcd.service > ${LOG_DIR}/system_logs/vbmcd.log
31+
journalctl --no-pager -u NetworkManager.service > ${LOG_DIR}/system_logs/NetworkManager.log
32+
33+
if [[ -d /etc/sysconfig/network-scripts/ ]]; then
34+
cp -r /etc/sysconfig/network-scripts/ ${LOG_DIR}/system_logs/
35+
fi
36+
37+
if [[ -d /etc/NetworkManager/system-connections/ ]]; then
38+
cp -r /etc/NetworkManager/system-connections/ ${LOG_DIR}/system_logs/
39+
fi
40+
41+
if [[ -d /etc/yum.repos.d/ ]]; then
42+
cp -r /etc/yum.repos.d/ ${LOG_DIR}/system_logs/
43+
fi
44+
45+
if [[ -d /etc/apt/sources.list.d/ ]]; then
46+
cp -r /etc/apt/sources.list.d/ ${LOG_DIR}/system_logs/
47+
fi
48+
49+
if [[ -d /etc/systemd/ ]]; then
50+
cp -rL /etc/systemd/ ${LOG_DIR}/system_logs/
51+
fi
52+
53+
df -h > ${LOG_DIR}/system_logs/df.txt
54+
# Gather disk usage statistics for files and directories larger than 1MB
55+
du -d 5 -hx / | sort -hr | grep '^[0-9\.]*[MGT]' > ${LOG_DIR}/system_logs/du.txt
56+
free > ${LOG_DIR}/system_logs/free.txt
57+
cat /etc/hosts > ${LOG_DIR}/system_logs/hosts.txt
58+
parted -l > ${LOG_DIR}/system_logs/parted-l.txt
59+
mount > ${LOG_DIR}/system_logs/mount.txt
60+
env > ${LOG_DIR}/system_logs/env.txt
61+
ip address > ${LOG_DIR}/system_logs/ip-address.txt
62+
ip route > ${LOG_DIR}/system_logs/ip-route.txt
63+
ip route show table all > ${LOG_DIR}/system_logs/ip-route-all-tables.txt
64+
ip rule list > ${LOG_DIR}/system_logs/ip-rule-list.txt
65+
pvs > ${LOG_DIR}/system_logs/pvs.txt
66+
vgs > ${LOG_DIR}/system_logs/vgs.txt
67+
lvs > ${LOG_DIR}/system_logs/lvs.txt
68+
69+
iptables-save > ${LOG_DIR}/system_logs/iptables.txt
70+
71+
if [ `command -v dpkg` ]; then
72+
dpkg -l > ${LOG_DIR}/system_logs/dpkg-l.txt
73+
fi
74+
if [ `command -v rpm` ]; then
75+
rpm -qa > ${LOG_DIR}/system_logs/rpm-qa.txt
76+
fi
77+
78+
# final memory usage and process list
79+
ps -eo user,pid,ppid,lwp,%cpu,%mem,size,rss,cmd > ${LOG_DIR}/system_logs/ps.txt
80+
81+
# available entropy
82+
cat /proc/sys/kernel/random/entropy_avail > ${LOG_DIR}/system_logs/entropy_avail.txt
83+
84+
# docker related information
85+
(docker info && docker images && docker ps -a) > ${LOG_DIR}/system_logs/docker-info.txt
86+
87+
for container in $(docker ps -a --format "{{.Names}}"); do
88+
docker logs --tail all ${container} &> ${LOG_DIR}/docker_logs/${container}.txt
89+
done
90+
91+
# Bifrost: grab config files and logs from the container.
92+
if [[ $(docker ps -q -f name=bifrost_deploy) ]]; then
93+
mkdir -p ${LOG_DIR}/bifrost
94+
for service in dnsmasq ironic-api ironic-conductor ironic-inspector mariadb nginx rabbitmq-server; do
95+
mkdir -p ${LOG_DIR}/bifrost/$service
96+
docker exec bifrost_deploy \
97+
systemctl status $service -l -n 10000 > ${LOG_DIR}/bifrost/$service/${service}-systemd-status.txt
98+
docker exec bifrost_deploy \
99+
journalctl -u $service --no-pager > ${LOG_DIR}/bifrost/$service/${service}-journal.txt
100+
done
101+
docker exec -it bifrost_deploy \
102+
journalctl --no-pager > ${LOG_DIR}/bifrost/bifrost-journal.log
103+
for d in dnsmasq.conf ironic ironic-inspector nginx/nginx.conf; do
104+
docker cp bifrost_deploy:/etc/$d ${LOG_DIR}/kolla_node_configs/bifrost/
105+
done
106+
docker cp bifrost_deploy:/var/log/mariadb/mariadb.log ${LOG_DIR}/bifrost/mariadb/
107+
fi
108+
109+
# IPA build logs
110+
if [[ -f /opt/kayobe/images/ipa/ipa.stderr ]] || [[ -f /opt/kayobe/images/ipa/ipa.stdout ]]; then
111+
mkdir -p ${LOG_DIR}/ipa
112+
cp /opt/kayobe/images/ipa/ipa.stderr /opt/kayobe/images/ipa/ipa.stdout ${LOG_DIR}/ipa/
113+
fi
114+
115+
# Overcloud host image build logs
116+
if [[ -f /opt/kayobe/images/deployment_image/deployment_image.stderr ]] || [[ -f /opt/kayobe/images/deployment_image/deployment_image.stdout ]]; then
117+
mkdir -p ${LOG_DIR}/deployment_image
118+
cp /opt/kayobe/images/deployment_image/deployment_image.stderr /opt/kayobe/images/deployment_image/deployment_image.stdout ${LOG_DIR}/deployment_image/
119+
fi
120+
121+
chown -R stack: ${LOG_DIR}
122+
}
123+
124+
copy_logs

0 commit comments

Comments
 (0)