Skip to content

Commit ed1e48e

Browse files
Jacob PanKAGA-KOKO
authored andcommitted
iommu/vt-d: Enable posted mode for device MSIs
With posted MSI feature enabled on the CPU side, iommu interrupt remapping table entries (IRTEs) for device MSI/x can be allocated, activated, and programed in posted mode. This means that IRTEs are linked with their respective PIDs of the target CPU. Handlers for the posted MSI notification vector will de-multiplex device MSI handlers. CPU notifications are coalesced if interrupts arrive at a high frequency. Posted interrupts are only used for device MSI and not for legacy devices (IO/APIC, HPET). Introduce a new irq_chip for posted MSIs, which has a dummy irq_ack() callback as EOI is performed in the notification handler once. When posted MSI is enabled, MSI domain/chip hierarchy will look like this example: domain: IR-PCI-MSIX-0000:50:00.0-12 hwirq: 0x29 chip: IR-PCI-MSIX-0000:50:00.0 flags: 0x430 IRQCHIP_SKIP_SET_WAKE IRQCHIP_ONESHOT_SAFE parent: domain: INTEL-IR-10-13 hwirq: 0x2d0000 chip: INTEL-IR-POST flags: 0x0 parent: domain: VECTOR hwirq: 0x77 chip: APIC Suggested-by: Thomas Gleixner <[email protected]> Signed-off-by: Jacob Pan <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent be9be07 commit ed1e48e

File tree

1 file changed

+109
-4
lines changed

1 file changed

+109
-4
lines changed

drivers/iommu/intel/irq_remapping.c

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <asm/cpu.h>
2020
#include <asm/irq_remapping.h>
2121
#include <asm/pci-direct.h>
22+
#include <asm/posted_intr.h>
2223

2324
#include "iommu.h"
2425
#include "../irq_remapping.h"
@@ -49,6 +50,7 @@ struct irq_2_iommu {
4950
u16 sub_handle;
5051
u8 irte_mask;
5152
enum irq_mode mode;
53+
bool posted_msi;
5254
};
5355

5456
struct intel_ir_data {
@@ -1118,6 +1120,14 @@ static void prepare_irte(struct irte *irte, int vector, unsigned int dest)
11181120
irte->redir_hint = 1;
11191121
}
11201122

1123+
static void prepare_irte_posted(struct irte *irte)
1124+
{
1125+
memset(irte, 0, sizeof(*irte));
1126+
1127+
irte->present = 1;
1128+
irte->p_pst = 1;
1129+
}
1130+
11211131
struct irq_remap_ops intel_irq_remap_ops = {
11221132
.prepare = intel_prepare_irq_remapping,
11231133
.enable = intel_enable_irq_remapping,
@@ -1126,6 +1136,47 @@ struct irq_remap_ops intel_irq_remap_ops = {
11261136
.enable_faulting = enable_drhd_fault_handling,
11271137
};
11281138

1139+
#ifdef CONFIG_X86_POSTED_MSI
1140+
1141+
static phys_addr_t get_pi_desc_addr(struct irq_data *irqd)
1142+
{
1143+
int cpu = cpumask_first(irq_data_get_effective_affinity_mask(irqd));
1144+
1145+
if (WARN_ON(cpu >= nr_cpu_ids))
1146+
return 0;
1147+
1148+
return __pa(per_cpu_ptr(&posted_msi_pi_desc, cpu));
1149+
}
1150+
1151+
static void intel_ir_reconfigure_irte_posted(struct irq_data *irqd)
1152+
{
1153+
struct intel_ir_data *ir_data = irqd->chip_data;
1154+
struct irte *irte = &ir_data->irte_entry;
1155+
struct irte irte_pi;
1156+
u64 pid_addr;
1157+
1158+
pid_addr = get_pi_desc_addr(irqd);
1159+
1160+
if (!pid_addr) {
1161+
pr_warn("Failed to setup IRQ %d for posted mode", irqd->irq);
1162+
return;
1163+
}
1164+
1165+
memset(&irte_pi, 0, sizeof(irte_pi));
1166+
1167+
/* The shared IRTE already be set up as posted during alloc_irte */
1168+
dmar_copy_shared_irte(&irte_pi, irte);
1169+
1170+
irte_pi.pda_l = (pid_addr >> (32 - PDA_LOW_BIT)) & ~(-1UL << PDA_LOW_BIT);
1171+
irte_pi.pda_h = (pid_addr >> 32) & ~(-1UL << PDA_HIGH_BIT);
1172+
1173+
modify_irte(&ir_data->irq_2_iommu, &irte_pi);
1174+
}
1175+
1176+
#else
1177+
static inline void intel_ir_reconfigure_irte_posted(struct irq_data *irqd) {}
1178+
#endif
1179+
11291180
static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
11301181
{
11311182
struct intel_ir_data *ir_data = irqd->chip_data;
@@ -1139,8 +1190,9 @@ static void intel_ir_reconfigure_irte(struct irq_data *irqd, bool force)
11391190
irte->vector = cfg->vector;
11401191
irte->dest_id = IRTE_DEST(cfg->dest_apicid);
11411192

1142-
/* Update the hardware only if the interrupt is in remapped mode. */
1143-
if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
1193+
if (ir_data->irq_2_iommu.posted_msi)
1194+
intel_ir_reconfigure_irte_posted(irqd);
1195+
else if (force || ir_data->irq_2_iommu.mode == IRQ_REMAPPING)
11441196
modify_irte(&ir_data->irq_2_iommu, irte);
11451197
}
11461198

@@ -1194,7 +1246,7 @@ static int intel_ir_set_vcpu_affinity(struct irq_data *data, void *info)
11941246
struct intel_ir_data *ir_data = data->chip_data;
11951247
struct vcpu_data *vcpu_pi_info = info;
11961248

1197-
/* stop posting interrupts, back to remapping mode */
1249+
/* stop posting interrupts, back to the default mode */
11981250
if (!vcpu_pi_info) {
11991251
modify_irte(&ir_data->irq_2_iommu, &ir_data->irte_entry);
12001252
} else {
@@ -1233,6 +1285,49 @@ static struct irq_chip intel_ir_chip = {
12331285
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
12341286
};
12351287

1288+
/*
1289+
* With posted MSIs, all vectors are multiplexed into a single notification
1290+
* vector. Devices MSIs are then dispatched in a demux loop where
1291+
* EOIs can be coalesced as well.
1292+
*
1293+
* "INTEL-IR-POST" IRQ chip does not do EOI on ACK, thus the dummy irq_ack()
1294+
* function. Instead EOI is performed by the posted interrupt notification
1295+
* handler.
1296+
*
1297+
* For the example below, 3 MSIs are coalesced into one CPU notification. Only
1298+
* one apic_eoi() is needed.
1299+
*
1300+
* __sysvec_posted_msi_notification()
1301+
* irq_enter();
1302+
* handle_edge_irq()
1303+
* irq_chip_ack_parent()
1304+
* dummy(); // No EOI
1305+
* handle_irq_event()
1306+
* driver_handler()
1307+
* handle_edge_irq()
1308+
* irq_chip_ack_parent()
1309+
* dummy(); // No EOI
1310+
* handle_irq_event()
1311+
* driver_handler()
1312+
* handle_edge_irq()
1313+
* irq_chip_ack_parent()
1314+
* dummy(); // No EOI
1315+
* handle_irq_event()
1316+
* driver_handler()
1317+
* apic_eoi()
1318+
* irq_exit()
1319+
*/
1320+
1321+
static void dummy_ack(struct irq_data *d) { }
1322+
1323+
static struct irq_chip intel_ir_chip_post_msi = {
1324+
.name = "INTEL-IR-POST",
1325+
.irq_ack = dummy_ack,
1326+
.irq_set_affinity = intel_ir_set_affinity,
1327+
.irq_compose_msi_msg = intel_ir_compose_msi_msg,
1328+
.irq_set_vcpu_affinity = intel_ir_set_vcpu_affinity,
1329+
};
1330+
12361331
static void fill_msi_msg(struct msi_msg *msg, u32 index, u32 subhandle)
12371332
{
12381333
memset(msg, 0, sizeof(*msg));
@@ -1274,6 +1369,11 @@ static void intel_irq_remapping_prepare_irte(struct intel_ir_data *data,
12741369
break;
12751370
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
12761371
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
1372+
if (posted_msi_supported()) {
1373+
prepare_irte_posted(irte);
1374+
data->irq_2_iommu.posted_msi = 1;
1375+
}
1376+
12771377
set_msi_sid(irte,
12781378
pci_real_dma_dev(msi_desc_to_pci_dev(info->desc)));
12791379
break;
@@ -1361,7 +1461,12 @@ static int intel_irq_remapping_alloc(struct irq_domain *domain,
13611461

13621462
irq_data->hwirq = (index << 16) + i;
13631463
irq_data->chip_data = ird;
1364-
irq_data->chip = &intel_ir_chip;
1464+
if (posted_msi_supported() &&
1465+
((info->type == X86_IRQ_ALLOC_TYPE_PCI_MSI) ||
1466+
(info->type == X86_IRQ_ALLOC_TYPE_PCI_MSIX)))
1467+
irq_data->chip = &intel_ir_chip_post_msi;
1468+
else
1469+
irq_data->chip = &intel_ir_chip;
13651470
intel_irq_remapping_prepare_irte(ird, irq_cfg, info, index, i);
13661471
irq_set_status_flags(virq + i, IRQ_MOVE_PCNTXT);
13671472
}

0 commit comments

Comments
 (0)