@@ -319,7 +319,6 @@ static struct vmbus_channel *alloc_channel(void)
319
319
init_completion (& channel -> rescind_event );
320
320
321
321
INIT_LIST_HEAD (& channel -> sc_list );
322
- INIT_LIST_HEAD (& channel -> percpu_list );
323
322
324
323
tasklet_init (& channel -> callback_event ,
325
324
vmbus_on_event , (unsigned long )channel );
@@ -340,23 +339,49 @@ static void free_channel(struct vmbus_channel *channel)
340
339
kobject_put (& channel -> kobj );
341
340
}
342
341
343
- static void percpu_channel_enq ( void * arg )
342
+ void vmbus_channel_map_relid ( struct vmbus_channel * channel )
344
343
{
345
- struct vmbus_channel * channel = arg ;
346
- struct hv_per_cpu_context * hv_cpu
347
- = this_cpu_ptr (hv_context .cpu_context );
348
-
349
- list_add_tail_rcu (& channel -> percpu_list , & hv_cpu -> chan_list );
344
+ if (WARN_ON (channel -> offermsg .child_relid >= MAX_CHANNEL_RELIDS ))
345
+ return ;
346
+ /*
347
+ * The mapping of the channel's relid is visible from the CPUs that
348
+ * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
349
+ * execute:
350
+ *
351
+ * (a) In the "normal (i.e., not resuming from hibernation)" path,
352
+ * the full barrier in smp_store_mb() guarantees that the store
353
+ * is propagated to all CPUs before the add_channel_work work
354
+ * is queued. In turn, add_channel_work is queued before the
355
+ * channel's ring buffer is allocated/initialized and the
356
+ * OPENCHANNEL message for the channel is sent in vmbus_open().
357
+ * Hyper-V won't start sending the interrupts for the channel
358
+ * before the OPENCHANNEL message is acked. The memory barrier
359
+ * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
360
+ * that vmbus_chan_sched() must find the channel's relid in
361
+ * recv_int_page before retrieving the channel pointer from the
362
+ * array of channels.
363
+ *
364
+ * (b) In the "resuming from hibernation" path, the smp_store_mb()
365
+ * guarantees that the store is propagated to all CPUs before
366
+ * the VMBus connection is marked as ready for the resume event
367
+ * (cf. check_ready_for_resume_event()). The interrupt handler
368
+ * of the VMBus driver and vmbus_chan_sched() can not run before
369
+ * vmbus_bus_resume() has completed execution (cf. resume_noirq).
370
+ */
371
+ smp_store_mb (
372
+ vmbus_connection .channels [channel -> offermsg .child_relid ],
373
+ channel );
350
374
}
351
375
352
- static void percpu_channel_deq ( void * arg )
376
+ void vmbus_channel_unmap_relid ( struct vmbus_channel * channel )
353
377
{
354
- struct vmbus_channel * channel = arg ;
355
-
356
- list_del_rcu (& channel -> percpu_list );
378
+ if (WARN_ON (channel -> offermsg .child_relid >= MAX_CHANNEL_RELIDS ))
379
+ return ;
380
+ WRITE_ONCE (
381
+ vmbus_connection .channels [channel -> offermsg .child_relid ],
382
+ NULL );
357
383
}
358
384
359
-
360
385
static void vmbus_release_relid (u32 relid )
361
386
{
362
387
struct vmbus_channel_relid_released msg ;
@@ -376,17 +401,25 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
376
401
struct vmbus_channel * primary_channel ;
377
402
unsigned long flags ;
378
403
379
- BUG_ON (! mutex_is_locked ( & vmbus_connection .channel_mutex ) );
404
+ lockdep_assert_held ( & vmbus_connection .channel_mutex );
380
405
BUG_ON (!channel -> rescind );
381
406
382
- if (channel -> target_cpu != get_cpu ()) {
383
- put_cpu ();
384
- smp_call_function_single (channel -> target_cpu ,
385
- percpu_channel_deq , channel , true);
386
- } else {
387
- percpu_channel_deq (channel );
388
- put_cpu ();
389
- }
407
+ /*
408
+ * hv_process_channel_removal() could find INVALID_RELID only for
409
+ * hv_sock channels. See the inline comments in vmbus_onoffer().
410
+ */
411
+ WARN_ON (channel -> offermsg .child_relid == INVALID_RELID &&
412
+ !is_hvsock_channel (channel ));
413
+
414
+ /*
415
+ * Upon suspend, an in-use hv_sock channel is removed from the array of
416
+ * channels and the relid is invalidated. After hibernation, when the
417
+ * user-space appplication destroys the channel, it's unnecessary and
418
+ * unsafe to remove the channel from the array of channels. See also
419
+ * the inline comments before the call of vmbus_release_relid() below.
420
+ */
421
+ if (channel -> offermsg .child_relid != INVALID_RELID )
422
+ vmbus_channel_unmap_relid (channel );
390
423
391
424
if (channel -> primary_channel == NULL ) {
392
425
list_del (& channel -> listentry );
@@ -447,16 +480,6 @@ static void vmbus_add_channel_work(struct work_struct *work)
447
480
448
481
init_vp_index (newchannel , dev_type );
449
482
450
- if (newchannel -> target_cpu != get_cpu ()) {
451
- put_cpu ();
452
- smp_call_function_single (newchannel -> target_cpu ,
453
- percpu_channel_enq ,
454
- newchannel , true);
455
- } else {
456
- percpu_channel_enq (newchannel );
457
- put_cpu ();
458
- }
459
-
460
483
/*
461
484
* This state is used to indicate a successful open
462
485
* so that when we do close the channel normally, we
@@ -523,17 +546,10 @@ static void vmbus_add_channel_work(struct work_struct *work)
523
546
spin_unlock_irqrestore (& primary_channel -> lock , flags );
524
547
}
525
548
526
- mutex_unlock (& vmbus_connection .channel_mutex );
549
+ /* vmbus_process_offer() has mapped the channel. */
550
+ vmbus_channel_unmap_relid (newchannel );
527
551
528
- if (newchannel -> target_cpu != get_cpu ()) {
529
- put_cpu ();
530
- smp_call_function_single (newchannel -> target_cpu ,
531
- percpu_channel_deq ,
532
- newchannel , true);
533
- } else {
534
- percpu_channel_deq (newchannel );
535
- put_cpu ();
536
- }
552
+ mutex_unlock (& vmbus_connection .channel_mutex );
537
553
538
554
vmbus_release_relid (newchannel -> offermsg .child_relid );
539
555
@@ -599,6 +615,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
599
615
spin_unlock_irqrestore (& channel -> lock , flags );
600
616
}
601
617
618
+ vmbus_channel_map_relid (newchannel );
619
+
602
620
mutex_unlock (& vmbus_connection .channel_mutex );
603
621
604
622
/*
@@ -940,45 +958,72 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
940
958
oldchannel = find_primary_channel_by_offer (offer );
941
959
942
960
if (oldchannel != NULL ) {
943
- atomic_dec (& vmbus_connection .offer_in_progress );
944
-
945
961
/*
946
962
* We're resuming from hibernation: all the sub-channel and
947
963
* hv_sock channels we had before the hibernation should have
948
964
* been cleaned up, and now we must be seeing a re-offered
949
965
* primary channel that we had before the hibernation.
950
966
*/
951
967
968
+ /*
969
+ * { Initially: channel relid = INVALID_RELID,
970
+ * channels[valid_relid] = NULL }
971
+ *
972
+ * CPU1 CPU2
973
+ *
974
+ * [vmbus_onoffer()] [vmbus_device_release()]
975
+ *
976
+ * LOCK channel_mutex LOCK channel_mutex
977
+ * STORE channel relid = valid_relid LOAD r1 = channel relid
978
+ * MAP_RELID channel if (r1 != INVALID_RELID)
979
+ * UNLOCK channel_mutex UNMAP_RELID channel
980
+ * UNLOCK channel_mutex
981
+ *
982
+ * Forbids: r1 == valid_relid &&
983
+ * channels[valid_relid] == channel
984
+ *
985
+ * Note. r1 can be INVALID_RELID only for an hv_sock channel.
986
+ * None of the hv_sock channels which were present before the
987
+ * suspend are re-offered upon the resume. See the WARN_ON()
988
+ * in hv_process_channel_removal().
989
+ */
990
+ mutex_lock (& vmbus_connection .channel_mutex );
991
+
992
+ atomic_dec (& vmbus_connection .offer_in_progress );
993
+
952
994
WARN_ON (oldchannel -> offermsg .child_relid != INVALID_RELID );
953
995
/* Fix up the relid. */
954
996
oldchannel -> offermsg .child_relid = offer -> child_relid ;
955
997
956
998
offer_sz = sizeof (* offer );
957
- if (memcmp (offer , & oldchannel -> offermsg , offer_sz ) == 0 ) {
958
- check_ready_for_resume_event ();
959
- return ;
999
+ if (memcmp (offer , & oldchannel -> offermsg , offer_sz ) != 0 ) {
1000
+ /*
1001
+ * This is not an error, since the host can also change
1002
+ * the other field(s) of the offer, e.g. on WS RS5
1003
+ * (Build 17763), the offer->connection_id of the
1004
+ * Mellanox VF vmbus device can change when the host
1005
+ * reoffers the device upon resume.
1006
+ */
1007
+ pr_debug ("vmbus offer changed: relid=%d\n" ,
1008
+ offer -> child_relid );
1009
+
1010
+ print_hex_dump_debug ("Old vmbus offer: " ,
1011
+ DUMP_PREFIX_OFFSET , 16 , 4 ,
1012
+ & oldchannel -> offermsg , offer_sz ,
1013
+ false);
1014
+ print_hex_dump_debug ("New vmbus offer: " ,
1015
+ DUMP_PREFIX_OFFSET , 16 , 4 ,
1016
+ offer , offer_sz , false);
1017
+
1018
+ /* Fix up the old channel. */
1019
+ vmbus_setup_channel_state (oldchannel , offer );
960
1020
}
961
1021
962
- /*
963
- * This is not an error, since the host can also change the
964
- * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
965
- * the offer->connection_id of the Mellanox VF vmbus device
966
- * can change when the host reoffers the device upon resume.
967
- */
968
- pr_debug ("vmbus offer changed: relid=%d\n" ,
969
- offer -> child_relid );
970
-
971
- print_hex_dump_debug ("Old vmbus offer: " , DUMP_PREFIX_OFFSET ,
972
- 16 , 4 , & oldchannel -> offermsg , offer_sz ,
973
- false);
974
- print_hex_dump_debug ("New vmbus offer: " , DUMP_PREFIX_OFFSET ,
975
- 16 , 4 , offer , offer_sz , false);
976
-
977
- /* Fix up the old channel. */
978
- vmbus_setup_channel_state (oldchannel , offer );
979
-
1022
+ /* Add the channel back to the array of channels. */
1023
+ vmbus_channel_map_relid (oldchannel );
980
1024
check_ready_for_resume_event ();
981
1025
1026
+ mutex_unlock (& vmbus_connection .channel_mutex );
982
1027
return ;
983
1028
}
984
1029
@@ -1036,14 +1081,14 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
1036
1081
*
1037
1082
* CPU1 CPU2
1038
1083
*
1039
- * [vmbus_process_offer ()] [vmbus_onoffer_rescind()]
1084
+ * [vmbus_onoffer ()] [vmbus_onoffer_rescind()]
1040
1085
*
1041
1086
* LOCK channel_mutex WAIT_ON offer_in_progress == 0
1042
1087
* DECREMENT offer_in_progress LOCK channel_mutex
1043
- * INSERT chn_list SEARCH chn_list
1088
+ * STORE channels[] LOAD channels[]
1044
1089
* UNLOCK channel_mutex UNLOCK channel_mutex
1045
1090
*
1046
- * Forbids: CPU2's SEARCH from *not* seeing CPU1's INSERT
1091
+ * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
1047
1092
*/
1048
1093
1049
1094
while (atomic_read (& vmbus_connection .offer_in_progress ) != 0 ) {
0 commit comments