@@ -718,8 +718,8 @@ static int send_message_queue_full(void *cb, struct gru_message_queue_desc *mqd,
718
718
static int send_message_put_nacked (void * cb , struct gru_message_queue_desc * mqd ,
719
719
void * mesg , int lines )
720
720
{
721
- unsigned long m , * val = mesg , gpa , save ;
722
- int ret ;
721
+ unsigned long m ;
722
+ int ret , loops = 200 ; /* experimentally determined */
723
723
724
724
m = mqd -> mq_gpa + (gru_get_amo_value_head (cb ) << 6 );
725
725
if (lines == 2 ) {
@@ -735,22 +735,28 @@ static int send_message_put_nacked(void *cb, struct gru_message_queue_desc *mqd,
735
735
return MQE_OK ;
736
736
737
737
/*
738
- * Send a cross-partition interrupt to the SSI that contains the target
739
- * message queue. Normally, the interrupt is automatically delivered by
740
- * hardware but some error conditions require explicit delivery.
741
- * Use the GRU to deliver the interrupt. Otherwise partition failures
738
+ * Send a noop message in order to deliver a cross-partition interrupt
739
+ * to the SSI that contains the target message queue. Normally, the
740
+ * interrupt is automatically delivered by hardware following mesq
741
+ * operations, but some error conditions require explicit delivery.
742
+ * The noop message will trigger delivery. Otherwise partition failures
742
743
* could cause unrecovered errors.
743
744
*/
744
- gpa = uv_global_gru_mmr_address (mqd -> interrupt_pnode , UVH_IPI_INT );
745
- save = * val ;
746
- * val = uv_hub_ipi_value (mqd -> interrupt_apicid , mqd -> interrupt_vector ,
747
- dest_Fixed );
748
- gru_vstore_phys (cb , gpa , gru_get_tri (mesg ), IAA_REGISTER , IMA );
749
- ret = gru_wait (cb );
750
- * val = save ;
751
- if (ret != CBS_IDLE )
752
- return MQE_UNEXPECTED_CB_ERR ;
753
- return MQE_OK ;
745
+ do {
746
+ ret = send_noop_message (cb , mqd , mesg );
747
+ } while ((ret == MQIE_AGAIN || ret == MQE_CONGESTION ) && (loops -- > 0 ));
748
+
749
+ if (ret == MQIE_AGAIN || ret == MQE_CONGESTION ) {
750
+ /*
751
+ * Don't indicate to the app to resend the message, as it's
752
+ * already been successfully sent. We simply send an OK
753
+ * (rather than fail the send with MQE_UNEXPECTED_CB_ERR),
754
+ * assuming that the other side is receiving enough
755
+ * interrupts to get this message processed anyway.
756
+ */
757
+ ret = MQE_OK ;
758
+ }
759
+ return ret ;
754
760
}
755
761
756
762
/*
0 commit comments