Skip to content

Commit 7cc0ade

Browse files
committed
Work around Ansible rc-13 race condition (mk2)
There is a race condition in Ansible that can result in this failure: msg: |- MODULE FAILURE See stdout/stderr for the exact error rc: -13 See ansible/ansible#78344 and ansible/ansible#81777. In stackhpc/stackhpc-kayobe-config#1108 we applied a workaround to increase the ControlPersist timeout to 1 hour, but this does not always work. Here we use a different workaround of disabling SSH pipelining. This has performance implications for Ansible, but is a reasonable trade-off for reliability. We set the config option as an environment variable rather than in ansible.cfg in Kayobe configuration, to avoid a merge conflict on upgrade.
1 parent cf3baaf commit 7cc0ade

File tree

1 file changed

+51
-24
lines changed

1 file changed

+51
-24
lines changed

ansible/files/multinode.sh

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,40 @@ function decrypt_file() {
8282
ansible-vault decrypt --vault-password-file ~/vault.password $file
8383
}
8484

85+
function workaround_ansible_rc13_bug() {
86+
# Call this function in between long-running Ansible executions to attempt to
87+
# work around an Ansible race condition.
88+
89+
# There is a race condition in Ansible that can result in this failure:
90+
# msg: |-
91+
# MODULE FAILURE
92+
# See stdout/stderr for the exact error
93+
# rc: -13
94+
# See https://github.com/ansible/ansible/issues/78344 and
95+
# https://github.com/ansible/ansible/issues/81777.
96+
# In https://github.com/stackhpc/stackhpc-kayobe-config/pull/1108 we applied
97+
# a workaround to increase the ControlPersist timeout to 1 hour, but this
98+
# does not always work.
99+
# Here we use a different workaround of disabling SSH pipelining. This has
100+
# performance implications for Ansible, but is a reasonable trade-off for
101+
# reliability.
102+
# We set the config option as an environment variable rather than in
103+
# ansible.cfg in Kayobe configuration, to avoid a merge conflict on upgrade.
104+
export ANSIBLE_PIPELINING=False
105+
}
106+
107+
function run_kayobe() {
108+
workaround_ansible_rc13_bug
109+
kayobe $*
110+
}
111+
85112
function deploy_seed() {
86-
kayobe seed host configure
113+
run_kayobe seed host configure
87114
}
88115

89116
function deploy_seed_vault() {
90117
# Deploy hashicorp vault to the seed
91-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-seed.yml
118+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-seed.yml
92119
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/vault/OS-TLS-INT.pem
93120
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/vault/seed-vault-keys.json
94121
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/vault/*.key
@@ -97,8 +124,8 @@ function deploy_seed_vault() {
97124
function get_seed_ssh() {
98125
# NOTE: Bash clears the -e option in subshells when not in Posix mode.
99126
set -e
100-
ssh_user=$(kayobe configuration dump --host seed[0] --var-name ansible_user | tr -d '"')
101-
seed_addr=$(kayobe configuration dump --host seed[0] --var-name ansible_host | tr -d '"')
127+
ssh_user=$(run_kayobe configuration dump --host seed[0] --var-name ansible_user | tr -d '"')
128+
seed_addr=$(run_kayobe configuration dump --host seed[0] --var-name ansible_host | tr -d '"')
102129
echo "${ssh_user}@${seed_addr}"
103130
}
104131

@@ -117,37 +144,37 @@ function copy_ca_to_seed() {
117144
}
118145

119146
function deploy_ceph() {
120-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm-deploy.yml
147+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm-deploy.yml
121148
sleep 30
122-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm.yml
123-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm-gather-keys.yml
149+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm.yml
150+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/cephadm-gather-keys.yml
124151
}
125152

126153
function deploy_overcloud_vault() {
127154
# NOTE: Previously it was necessary to first deploy HAProxy with TLS disabled.
128155
if [[ -f $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/globals-tls-config.yml ]]; then
129156
# Skip os_capacity deployment since it requires admin-openrc.sh which doesn't exist yet.
130-
kayobe overcloud service deploy --skip-tags os_capacity -kt haproxy
157+
run_kayobe overcloud service deploy --skip-tags os_capacity -kt haproxy
131158
fi
132159

133160
# Deploy hashicorp vault to the controllers
134-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-overcloud.yml
161+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-overcloud.yml
135162
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/vault/overcloud-vault-keys.json
136163
}
137164

138165
function generate_overcloud_certs() {
139166
# Generate external tls certificates
140167
if [[ -f $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml ]]; then
141-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml
168+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml
142169
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/certificates/haproxy.pem
143170
fi
144171

145172
# Generate internal tls certificates
146-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-internal-tls.yml
173+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-internal-tls.yml
147174
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/certificates/haproxy-internal.pem
148175

149176
# Generate backend tls certificates
150-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-backend-tls.yml
177+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-backend-tls.yml
151178
for cert in $(ls -1 $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/certificates/*-key.pem); do
152179
encrypt_file $cert
153180
done
@@ -164,15 +191,15 @@ function generate_barbican_secrets() {
164191
decrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
165192
sed -i "s/secret_id:.*/secret_id: $(uuidgen)/g" $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
166193
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
167-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-barbican.yml
194+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-deploy-barbican.yml
168195
decrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
169196
sed -i "s/role_id:.*/role_id: $(cat /tmp/barbican-role-id)/g" $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
170197
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/secrets.yml
171198
rm /tmp/barbican-role-id
172199
}
173200

174201
function deploy_overcloud() {
175-
kayobe overcloud host configure
202+
run_kayobe overcloud host configure
176203

177204
deploy_ceph
178205

@@ -185,19 +212,19 @@ function deploy_overcloud() {
185212
generate_barbican_secrets
186213

187214
# Deploy all services
188-
kayobe overcloud service deploy
215+
run_kayobe overcloud service deploy
189216

190217
copy_ca_to_seed
191218
}
192219

193220
function deploy_wazuh() {
194-
kayobe infra vm host configure
221+
run_kayobe infra vm host configure
195222

196223
# Deploy Wazuh
197-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-secrets.yml
224+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-secrets.yml
198225
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/wazuh-secrets.yml
199-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-manager.yml
200-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml
226+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-manager.yml
227+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/wazuh-agent.yml
201228
}
202229

203230
function create_resources() {
@@ -317,7 +344,7 @@ function deploy_full() {
317344

318345
deploy_seed
319346
deploy_overcloud
320-
if kayobe configuration dump --host wazuh-manager --var-name group_names | grep wazuh-manager &>/dev/null; then
347+
if run_kayobe configuration dump --host wazuh-manager --var-name group_names | grep wazuh-manager &>/dev/null; then
321348
deploy_wazuh
322349
fi
323350
create_resources
@@ -327,13 +354,13 @@ function deploy_full() {
327354
function upgrade_overcloud() {
328355
# Generate external tls certificates if it was previously disabled.
329356
if [[ -f $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml ]] && [[ ! -f $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/certificates/haproxy.pem ]]; then
330-
kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml
357+
run_kayobe playbook run $KAYOBE_CONFIG_PATH/ansible/vault-generate-test-external-tls.yml
331358
encrypt_file $KAYOBE_CONFIG_PATH/environments/$KAYOBE_ENVIRONMENT/kolla/certificates/haproxy.pem
332359
fi
333360

334-
kayobe overcloud host upgrade
335-
kayobe overcloud host configure
336-
kayobe overcloud service upgrade
361+
run_kayobe overcloud host upgrade
362+
run_kayobe overcloud host configure
363+
run_kayobe overcloud service upgrade
337364
}
338365

339366
function usage() {

0 commit comments

Comments
 (0)