Skip to content

Commit 027bd7e

Browse files
konradwilkDavid Vrabel
authored andcommitted
xen/xenbus: Avoid synchronous wait on XenBus stalling shutdown/restart.
The 'read_reply' works with 'process_msg' to read of a reply in XenBus. 'process_msg' is running from within the 'xenbus' thread. Whenever a message shows up in XenBus it is put on a xs_state.reply_list list and 'read_reply' picks it up. The problem is if the backend domain or the xenstored process is killed. In which case 'xenbus' is still awaiting - and 'read_reply' if called - stuck forever waiting for the reply_list to have some contents. This is normally not a problem - as the backend domain can come back or the xenstored process can be restarted. However if the domain is in process of being powered off/restarted/halted - there is no point of waiting on it coming back - as we are effectively being terminated and should not impede the progress. This patch solves this problem by checking whether the guest is the right domain. If it is an initial domain and hurtling towards death - there is no point of continuing the wait. All other type of guests continue with their behavior (as Xenstore is expected to still be running in another domain). Fixes-Bug: http://bugs.xenproject.org/xen/bug/8 Signed-off-by: Konrad Rzeszutek Wilk <[email protected]> Reviewed-by: Boris Ostrovsky <[email protected]> Reviewed-by: David Vrabel <[email protected]> Signed-off-by: David Vrabel <[email protected]>
1 parent e0fc17a commit 027bd7e

File tree

1 file changed

+41
-3
lines changed

1 file changed

+41
-3
lines changed

drivers/xen/xenbus/xenbus_xs.c

Lines changed: 41 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050
#include <xen/xenbus.h>
5151
#include <xen/xen.h>
5252
#include "xenbus_comms.h"
53+
#include "xenbus_probe.h"
5354

5455
struct xs_stored_msg {
5556
struct list_head list;
@@ -139,6 +140,29 @@ static int get_error(const char *errorstring)
139140
return xsd_errors[i].errnum;
140141
}
141142

143+
static bool xenbus_ok(void)
144+
{
145+
switch (xen_store_domain_type) {
146+
case XS_LOCAL:
147+
switch (system_state) {
148+
case SYSTEM_POWER_OFF:
149+
case SYSTEM_RESTART:
150+
case SYSTEM_HALT:
151+
return false;
152+
default:
153+
break;
154+
}
155+
return true;
156+
case XS_PV:
157+
case XS_HVM:
158+
/* FIXME: Could check that the remote domain is alive,
159+
* but it is normally initial domain. */
160+
return true;
161+
default:
162+
break;
163+
}
164+
return false;
165+
}
142166
static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
143167
{
144168
struct xs_stored_msg *msg;
@@ -148,9 +172,20 @@ static void *read_reply(enum xsd_sockmsg_type *type, unsigned int *len)
148172

149173
while (list_empty(&xs_state.reply_list)) {
150174
spin_unlock(&xs_state.reply_lock);
151-
/* XXX FIXME: Avoid synchronous wait for response here. */
152-
wait_event(xs_state.reply_waitq,
153-
!list_empty(&xs_state.reply_list));
175+
if (xenbus_ok())
176+
/* XXX FIXME: Avoid synchronous wait for response here. */
177+
wait_event_timeout(xs_state.reply_waitq,
178+
!list_empty(&xs_state.reply_list),
179+
msecs_to_jiffies(500));
180+
else {
181+
/*
182+
* If we are in the process of being shut-down there is
183+
* no point of trying to contact XenBus - it is either
184+
* killed (xenstored application) or the other domain
185+
* has been killed or is unreachable.
186+
*/
187+
return ERR_PTR(-EIO);
188+
}
154189
spin_lock(&xs_state.reply_lock);
155190
}
156191

@@ -215,6 +250,9 @@ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
215250

216251
mutex_unlock(&xs_state.request_mutex);
217252

253+
if (IS_ERR(ret))
254+
return ret;
255+
218256
if ((msg->type == XS_TRANSACTION_END) ||
219257
((req_msg.type == XS_TRANSACTION_START) &&
220258
(msg->type == XS_ERROR)))

0 commit comments

Comments
 (0)