19
19
#include <asm/cpu.h>
20
20
#include <asm/irq_remapping.h>
21
21
#include <asm/pci-direct.h>
22
+ #include <asm/posted_intr.h>
22
23
23
24
#include "iommu.h"
24
25
#include "../irq_remapping.h"
@@ -49,6 +50,7 @@ struct irq_2_iommu {
49
50
u16 sub_handle ;
50
51
u8 irte_mask ;
51
52
enum irq_mode mode ;
53
+ bool posted_msi ;
52
54
};
53
55
54
56
struct intel_ir_data {
@@ -1118,6 +1120,14 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
1118
1120
irte -> redir_hint = 1 ;
1119
1121
}
1120
1122
1123
+ static void prepare_irte_posted (struct irte * irte )
1124
+ {
1125
+ memset (irte , 0 , sizeof (* irte ));
1126
+
1127
+ irte -> present = 1 ;
1128
+ irte -> p_pst = 1 ;
1129
+ }
1130
+
1121
1131
struct irq_remap_ops intel_irq_remap_ops = {
1122
1132
.prepare = intel_prepare_irq_remapping ,
1123
1133
.enable = intel_enable_irq_remapping ,
@@ -1126,6 +1136,47 @@ struct irq_remap_ops intel_irq_remap_ops = {
1126
1136
.enable_faulting = enable_drhd_fault_handling ,
1127
1137
};
1128
1138
1139
+ #ifdef CONFIG_X86_POSTED_MSI
1140
+
1141
+ static phys_addr_t get_pi_desc_addr (struct irq_data * irqd )
1142
+ {
1143
+ int cpu = cpumask_first (irq_data_get_effective_affinity_mask (irqd ));
1144
+
1145
+ if (WARN_ON (cpu >= nr_cpu_ids ))
1146
+ return 0 ;
1147
+
1148
+ return __pa (per_cpu_ptr (& posted_msi_pi_desc , cpu ));
1149
+ }
1150
+
1151
+ static void intel_ir_reconfigure_irte_posted (struct irq_data * irqd )
1152
+ {
1153
+ struct intel_ir_data * ir_data = irqd -> chip_data ;
1154
+ struct irte * irte = & ir_data -> irte_entry ;
1155
+ struct irte irte_pi ;
1156
+ u64 pid_addr ;
1157
+
1158
+ pid_addr = get_pi_desc_addr (irqd );
1159
+
1160
+ if (!pid_addr ) {
1161
+ pr_warn ("Failed to setup IRQ %d for posted mode" , irqd -> irq );
1162
+ return ;
1163
+ }
1164
+
1165
+ memset (& irte_pi , 0 , sizeof (irte_pi ));
1166
+
1167
+ /* The shared IRTE already be set up as posted during alloc_irte */
1168
+ dmar_copy_shared_irte (& irte_pi , irte );
1169
+
1170
+ irte_pi .pda_l = (pid_addr >> (32 - PDA_LOW_BIT )) & ~(-1UL << PDA_LOW_BIT );
1171
+ irte_pi .pda_h = (pid_addr >> 32 ) & ~(-1UL << PDA_HIGH_BIT );
1172
+
1173
+ modify_irte (& ir_data -> irq_2_iommu , & irte_pi );
1174
+ }
1175
+
1176
+ #else
1177
+ static inline void intel_ir_reconfigure_irte_posted (struct irq_data * irqd ) {}
1178
+ #endif
1179
+
1129
1180
static void intel_ir_reconfigure_irte (struct irq_data * irqd , bool force )
1130
1181
{
1131
1182
struct intel_ir_data * ir_data = irqd -> chip_data ;
@@ -1139,8 +1190,9 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
1139
1190
irte -> vector = cfg -> vector ;
1140
1191
irte -> dest_id = IRTE_DEST (cfg -> dest_apicid );
1141
1192
1142
- /* Update the hardware only if the interrupt is in remapped mode. */
1143
- if (force || ir_data -> irq_2_iommu .mode == IRQ_REMAPPING )
1193
+ if (ir_data -> irq_2_iommu .posted_msi )
1194
+ intel_ir_reconfigure_irte_posted (irqd );
1195
+ else if (force || ir_data -> irq_2_iommu .mode == IRQ_REMAPPING )
1144
1196
modify_irte (& ir_data -> irq_2_iommu , irte );
1145
1197
}
1146
1198
@@ -1194,7 +1246,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
1194
1246
struct intel_ir_data * ir_data = data -> chip_data ;
1195
1247
struct vcpu_data * vcpu_pi_info = info ;
1196
1248
1197
- /* stop posting interrupts, back to remapping mode */
1249
+ /* stop posting interrupts, back to the default mode */
1198
1250
if (!vcpu_pi_info ) {
1199
1251
modify_irte (& ir_data -> irq_2_iommu , & ir_data -> irte_entry );
1200
1252
} else {
@@ -1233,6 +1285,49 @@ static struct irq_chip intel_ir_chip = {
1233
1285
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity ,
1234
1286
};
1235
1287
1288
+ /*
1289
+ * With posted MSIs, all vectors are multiplexed into a single notification
1290
+ * vector. Devices MSIs are then dispatched in a demux loop where
1291
+ * EOIs can be coalesced as well.
1292
+ *
1293
+ * "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
1294
+ * function. Instead EOI is performed by the posted interrupt notification
1295
+ * handler.
1296
+ *
1297
+ * For the example below, 3 MSIs are coalesced into one CPU notification. Only
1298
+ * one apic_eoi() is needed.
1299
+ *
1300
+ * __sysvec_posted_msi_notification()
1301
+ * irq_enter();
1302
+ * handle_edge_irq()
1303
+ * irq_chip_ack_parent()
1304
+ * dummy(); // No EOI
1305
+ * handle_irq_event()
1306
+ * driver_handler()
1307
+ * handle_edge_irq()
1308
+ * irq_chip_ack_parent()
1309
+ * dummy(); // No EOI
1310
+ * handle_irq_event()
1311
+ * driver_handler()
1312
+ * handle_edge_irq()
1313
+ * irq_chip_ack_parent()
1314
+ * dummy(); // No EOI
1315
+ * handle_irq_event()
1316
+ * driver_handler()
1317
+ * apic_eoi()
1318
+ * irq_exit()
1319
+ */
1320
+
1321
+ static void dummy_ack (struct irq_data * d ) { }
1322
+
1323
+ static struct irq_chip intel_ir_chip_post_msi = {
1324
+ .name = "INTEL-IR-POST" ,
1325
+ .irq_ack = dummy_ack ,
1326
+ .irq_set_affinity = intel_ir_set_affinity ,
1327
+ .irq_compose_msi_msg = intel_ir_compose_msi_msg ,
1328
+ .irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity ,
1329
+ };
1330
+
1236
1331
static void fill_msi_msg (struct msi_msg * msg , u32 index , u32 subhandle )
1237
1332
{
1238
1333
memset (msg , 0 , sizeof (* msg ));
@@ -1274,6 +1369,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
1274
1369
break ;
1275
1370
case X86_IRQ_ALLOC_TYPE_PCI_MSI :
1276
1371
case X86_IRQ_ALLOC_TYPE_PCI_MSIX :
1372
+ if (posted_msi_supported ()) {
1373
+ prepare_irte_posted (irte );
1374
+ data -> irq_2_iommu .posted_msi = 1 ;
1375
+ }
1376
+
1277
1377
set_msi_sid (irte ,
1278
1378
pci_real_dma_dev (msi_desc_to_pci_dev (info -> desc )));
1279
1379
break ;
@@ -1361,7 +1461,12 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
1361
1461
1362
1462
irq_data -> hwirq = (index << 16 ) + i ;
1363
1463
irq_data -> chip_data = ird ;
1364
- irq_data -> chip = & intel_ir_chip ;
1464
+ if (posted_msi_supported () &&
1465
+ ((info -> type == X86_IRQ_ALLOC_TYPE_PCI_MSI ) ||
1466
+ (info -> type == X86_IRQ_ALLOC_TYPE_PCI_MSIX )))
1467
+ irq_data -> chip = & intel_ir_chip_post_msi ;
1468
+ else
1469
+ irq_data -> chip = & intel_ir_chip ;
1365
1470
intel_irq_remapping_prepare_irte (ird , irq_cfg , info , index , i );
1366
1471
irq_set_status_flags (virq + i , IRQ_MOVE_PCNTXT );
1367
1472
}
0 commit comments