Skip to content

Commit d8bd2d4

Browse files
dcuiSasha Levin
authored andcommitted
Drivers: hv: vmbus: Resume after fixing up old primary channels
When the host re-offers the primary channels upon resume, the host only guarantees the Instance GUID doesn't change, so vmbus_bus_suspend() should invalidate channel->offermsg.child_relid and figure out the number of primary channels that need to be fixed up upon resume. Upon resume, vmbus_onoffer() finds the old channel structs, and maps the new offers to the old channels, and fixes up the old structs, and finally the resume callbacks of the VSC drivers will re-open the channels. Signed-off-by: Dexuan Cui <[email protected]> Reviewed-by: Michael Kelley <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent b307b38 commit d8bd2d4

File tree

5 files changed

+101
-20
lines changed

5 files changed

+101
-20
lines changed

drivers/hv/channel_mgmt.c

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
407407
cpumask_clear_cpu(channel->target_cpu,
408408
&primary_channel->alloced_cpus_in_node);
409409

410-
vmbus_release_relid(channel->offermsg.child_relid);
410+
/*
411+
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
412+
* the relid is invalidated; after hibernation, when the user-space app
413+
* destroys the channel, the relid is INVALID_RELID, and in this case
414+
* it's unnecessary and unsafe to release the old relid, since the same
415+
* relid can refer to a completely different channel now.
416+
*/
417+
if (channel->offermsg.child_relid != INVALID_RELID)
418+
vmbus_release_relid(channel->offermsg.child_relid);
411419

412420
free_channel(channel);
413421
}
@@ -851,6 +859,36 @@ void vmbus_initiate_unload(bool crash)
851859
vmbus_wait_for_unload();
852860
}
853861

862+
static void check_ready_for_resume_event(void)
863+
{
864+
/*
865+
* If all the old primary channels have been fixed up, then it's safe
866+
* to resume.
867+
*/
868+
if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
869+
complete(&vmbus_connection.ready_for_resume_event);
870+
}
871+
872+
static void vmbus_setup_channel_state(struct vmbus_channel *channel,
873+
struct vmbus_channel_offer_channel *offer)
874+
{
875+
/*
876+
* Setup state for signalling the host.
877+
*/
878+
channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
879+
880+
if (vmbus_proto_version != VERSION_WS2008) {
881+
channel->is_dedicated_interrupt =
882+
(offer->is_dedicated_interrupt != 0);
883+
channel->sig_event = offer->connection_id;
884+
}
885+
886+
memcpy(&channel->offermsg, offer,
887+
sizeof(struct vmbus_channel_offer_channel));
888+
channel->monitor_grp = (u8)offer->monitorid / 32;
889+
channel->monitor_bit = (u8)offer->monitorid % 32;
890+
}
891+
854892
/*
855893
* find_primary_channel_by_offer - Get the channel object given the new offer.
856894
* This is only used in the resume path of hibernation.
@@ -902,21 +940,42 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
902940
atomic_dec(&vmbus_connection.offer_in_progress);
903941

904942
/*
905-
* We're resuming from hibernation: we expect the host to send
906-
* exactly the same offers that we had before the hibernation.
943+
* We're resuming from hibernation: all the sub-channel and
944+
* hv_sock channels we had before the hibernation should have
945+
* been cleaned up, and now we must be seeing a re-offered
946+
* primary channel that we had before the hibernation.
907947
*/
948+
949+
WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
950+
/* Fix up the relid. */
951+
oldchannel->offermsg.child_relid = offer->child_relid;
952+
908953
offer_sz = sizeof(*offer);
909-
if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0)
954+
if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
955+
check_ready_for_resume_event();
910956
return;
957+
}
911958

912-
pr_debug("Mismatched offer from the host (relid=%d)\n",
959+
/*
960+
* This is not an error, since the host can also change the
961+
* other field(s) of the offer, e.g. on WS RS5 (Build 17763),
962+
* the offer->connection_id of the Mellanox VF vmbus device
963+
* can change when the host reoffers the device upon resume.
964+
*/
965+
pr_debug("vmbus offer changed: relid=%d\n",
913966
offer->child_relid);
914967

915968
print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
916969
16, 4, &oldchannel->offermsg, offer_sz,
917970
false);
918971
print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
919972
16, 4, offer, offer_sz, false);
973+
974+
/* Fix up the old channel. */
975+
vmbus_setup_channel_state(oldchannel, offer);
976+
977+
check_ready_for_resume_event();
978+
920979
return;
921980
}
922981

@@ -929,21 +988,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
929988
return;
930989
}
931990

932-
/*
933-
* Setup state for signalling the host.
934-
*/
935-
newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
936-
937-
if (vmbus_proto_version != VERSION_WS2008) {
938-
newchannel->is_dedicated_interrupt =
939-
(offer->is_dedicated_interrupt != 0);
940-
newchannel->sig_event = offer->connection_id;
941-
}
942-
943-
memcpy(&newchannel->offermsg, offer,
944-
sizeof(struct vmbus_channel_offer_channel));
945-
newchannel->monitor_grp = (u8)offer->monitorid / 32;
946-
newchannel->monitor_bit = (u8)offer->monitorid % 32;
991+
vmbus_setup_channel_state(newchannel, offer);
947992

948993
vmbus_process_offer(newchannel);
949994
}

drivers/hv/connection.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ struct vmbus_connection vmbus_connection = {
2929

3030
.ready_for_suspend_event= COMPLETION_INITIALIZER(
3131
vmbus_connection.ready_for_suspend_event),
32+
.ready_for_resume_event = COMPLETION_INITIALIZER(
33+
vmbus_connection.ready_for_resume_event),
3234
};
3335
EXPORT_SYMBOL_GPL(vmbus_connection);
3436

drivers/hv/hyperv_vmbus.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,20 @@ struct vmbus_connection {
272272
* drop to zero.
273273
*/
274274
struct completion ready_for_suspend_event;
275+
276+
/*
277+
* The number of primary channels that should be "fixed up"
278+
* upon resume: these channels are re-offered upon resume, and some
279+
* fields of the channel offers (i.e. child_relid and connection_id)
280+
* can change, so the old offermsg must be fixed up, before the resume
281+
* callbacks of the VSC drivers start to further touch the channels.
282+
*/
283+
atomic_t nr_chan_fixup_on_resume;
284+
/*
285+
* vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
286+
* drop to zero.
287+
*/
288+
struct completion ready_for_resume_event;
275289
};
276290

277291

drivers/hv/vmbus_drv.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2164,9 +2164,17 @@ static int vmbus_bus_suspend(struct device *dev)
21642164
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
21652165
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
21662166

2167+
WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
2168+
21672169
mutex_lock(&vmbus_connection.channel_mutex);
21682170

21692171
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
2172+
/*
2173+
* Invalidate the field. Upon resume, vmbus_onoffer() will fix
2174+
* up the field, and the other fields (if necessary).
2175+
*/
2176+
channel->offermsg.child_relid = INVALID_RELID;
2177+
21702178
if (is_hvsock_channel(channel)) {
21712179
if (!channel->rescind) {
21722180
pr_err("hv_sock channel not rescinded!\n");
@@ -2181,6 +2189,8 @@ static int vmbus_bus_suspend(struct device *dev)
21812189
WARN_ON_ONCE(1);
21822190
}
21832191
spin_unlock_irqrestore(&channel->lock, flags);
2192+
2193+
atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
21842194
}
21852195

21862196
mutex_unlock(&vmbus_connection.channel_mutex);
@@ -2189,6 +2199,9 @@ static int vmbus_bus_suspend(struct device *dev)
21892199

21902200
vmbus_connection.conn_state = DISCONNECTED;
21912201

2202+
/* Reset the event for the next resume. */
2203+
reinit_completion(&vmbus_connection.ready_for_resume_event);
2204+
21922205
return 0;
21932206
}
21942207

@@ -2223,8 +2236,12 @@ static int vmbus_bus_resume(struct device *dev)
22232236
if (ret != 0)
22242237
return ret;
22252238

2239+
WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
2240+
22262241
vmbus_request_offers();
22272242

2243+
wait_for_completion(&vmbus_connection.ready_for_resume_event);
2244+
22282245
/* Reset the event for the next suspend. */
22292246
reinit_completion(&vmbus_connection.ready_for_suspend_event);
22302247

include/linux/hyperv.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,9 @@ enum vmbus_channel_message_type {
426426
CHANNELMSG_COUNT
427427
};
428428

429+
/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
430+
#define INVALID_RELID U32_MAX
431+
429432
struct vmbus_channel_message_header {
430433
enum vmbus_channel_message_type msgtype;
431434
u32 padding;

0 commit comments

Comments
 (0)