Skip to content

Commit 37de98f

Browse files
rmurphy-armwildea01
authored andcommitted
iommu/arm-smmu-v3: Use CMD_SYNC completion MSI
As an IRQ, the CMD_SYNC interrupt is not particularly useful, not least because we often need to wait for sync completion within someone else's IRQ handler anyway. However, when the SMMU is both coherent and supports MSIs, we can have a lot more fun by not using it as an interrupt at all. Following the example suggested in the architecture and using a write targeting normal memory, we can let callers wait on a status variable outside the lock instead of having to stall the entire queue or even touch MMIO registers. Since multiple sync commands are guaranteed to complete in order, a simple incrementing sequence count is all we need to unambiguously support any realistic number of overlapping waiters. Signed-off-by: Robin Murphy <[email protected]> Signed-off-by: Will Deacon <[email protected]>
1 parent dce032a commit 37de98f

File tree

1 file changed

+49
-2
lines changed

1 file changed

+49
-2
lines changed

drivers/iommu/arm-smmu-v3.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,16 @@
378378

379379
#define CMDQ_SYNC_0_CS_SHIFT 12
380380
#define CMDQ_SYNC_0_CS_NONE (0UL << CMDQ_SYNC_0_CS_SHIFT)
381+
#define CMDQ_SYNC_0_CS_IRQ (1UL << CMDQ_SYNC_0_CS_SHIFT)
381382
#define CMDQ_SYNC_0_CS_SEV (2UL << CMDQ_SYNC_0_CS_SHIFT)
383+
#define CMDQ_SYNC_0_MSH_SHIFT 22
384+
#define CMDQ_SYNC_0_MSH_ISH (3UL << CMDQ_SYNC_0_MSH_SHIFT)
385+
#define CMDQ_SYNC_0_MSIATTR_SHIFT 24
386+
#define CMDQ_SYNC_0_MSIATTR_OIWB (0xfUL << CMDQ_SYNC_0_MSIATTR_SHIFT)
387+
#define CMDQ_SYNC_0_MSIDATA_SHIFT 32
388+
#define CMDQ_SYNC_0_MSIDATA_MASK 0xffffffffUL
389+
#define CMDQ_SYNC_1_MSIADDR_SHIFT 0
390+
#define CMDQ_SYNC_1_MSIADDR_MASK 0xffffffffffffcUL
382391

383392
/* Event queue */
384393
#define EVTQ_ENT_DWORDS 4
@@ -410,6 +419,7 @@
410419
/* High-level queue structures */
411420
#define ARM_SMMU_POLL_TIMEOUT_US 100
412421
#define ARM_SMMU_CMDQ_DRAIN_TIMEOUT_US 1000000 /* 1s! */
422+
#define ARM_SMMU_SYNC_TIMEOUT_US 1000000 /* 1s! */
413423

414424
#define MSI_IOVA_BASE 0x8000000
415425
#define MSI_IOVA_LENGTH 0x100000
@@ -496,6 +506,10 @@ struct arm_smmu_cmdq_ent {
496506
} pri;
497507

498508
#define CMDQ_OP_CMD_SYNC 0x46
509+
struct {
510+
u32 msidata;
511+
u64 msiaddr;
512+
} sync;
499513
};
500514
};
501515

@@ -609,6 +623,7 @@ struct arm_smmu_device {
609623

610624
int gerr_irq;
611625
int combined_irq;
626+
atomic_t sync_nr;
612627

613628
unsigned long ias; /* IPA */
614629
unsigned long oas; /* PA */
@@ -627,6 +642,8 @@ struct arm_smmu_device {
627642

628643
struct arm_smmu_strtab_cfg strtab_cfg;
629644

645+
u32 sync_count;
646+
630647
/* IOMMU core code handle */
631648
struct iommu_device iommu;
632649
};
@@ -871,7 +888,13 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
871888
}
872889
break;
873890
case CMDQ_OP_CMD_SYNC:
874-
cmd[0] |= CMDQ_SYNC_0_CS_SEV;
891+
if (ent->sync.msiaddr)
892+
cmd[0] |= CMDQ_SYNC_0_CS_IRQ;
893+
else
894+
cmd[0] |= CMDQ_SYNC_0_CS_SEV;
895+
cmd[0] |= CMDQ_SYNC_0_MSH_ISH | CMDQ_SYNC_0_MSIATTR_OIWB;
896+
cmd[0] |= (u64)ent->sync.msidata << CMDQ_SYNC_0_MSIDATA_SHIFT;
897+
cmd[1] |= ent->sync.msiaddr & CMDQ_SYNC_1_MSIADDR_MASK;
875898
break;
876899
default:
877900
return -ENOENT;
@@ -957,21 +980,44 @@ static void arm_smmu_cmdq_issue_cmd(struct arm_smmu_device *smmu,
957980
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
958981
}
959982

983+
/*
984+
* The difference between val and sync_idx is bounded by the maximum size of
985+
* a queue at 2^20 entries, so 32 bits is plenty for wrap-safe arithmetic.
986+
*/
987+
static int arm_smmu_sync_poll_msi(struct arm_smmu_device *smmu, u32 sync_idx)
988+
{
989+
ktime_t timeout = ktime_add_us(ktime_get(), ARM_SMMU_SYNC_TIMEOUT_US);
990+
u32 val = smp_cond_load_acquire(&smmu->sync_count,
991+
(int)(VAL - sync_idx) >= 0 ||
992+
!ktime_before(ktime_get(), timeout));
993+
994+
return (int)(val - sync_idx) < 0 ? -ETIMEDOUT : 0;
995+
}
996+
960997
static void arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
961998
{
962999
u64 cmd[CMDQ_ENT_DWORDS];
9631000
unsigned long flags;
9641001
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
1002+
bool msi = (smmu->features & ARM_SMMU_FEAT_MSI) &&
1003+
(smmu->features & ARM_SMMU_FEAT_COHERENCY);
9651004
struct arm_smmu_cmdq_ent ent = { .opcode = CMDQ_OP_CMD_SYNC };
9661005
int ret;
9671006

1007+
if (msi) {
1008+
ent.sync.msidata = atomic_inc_return_relaxed(&smmu->sync_nr);
1009+
ent.sync.msiaddr = virt_to_phys(&smmu->sync_count);
1010+
}
9681011
arm_smmu_cmdq_build_cmd(cmd, &ent);
9691012

9701013
spin_lock_irqsave(&smmu->cmdq.lock, flags);
9711014
arm_smmu_cmdq_insert_cmd(smmu, cmd);
972-
ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
1015+
if (!msi)
1016+
ret = queue_poll_cons(&smmu->cmdq.q, true, wfe);
9731017
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
9741018

1019+
if (msi)
1020+
ret = arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
9751021
if (ret)
9761022
dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout\n");
9771023
}
@@ -2159,6 +2205,7 @@ static int arm_smmu_init_structures(struct arm_smmu_device *smmu)
21592205
{
21602206
int ret;
21612207

2208+
atomic_set(&smmu->sync_nr, 0);
21622209
ret = arm_smmu_init_queues(smmu);
21632210
if (ret)
21642211
return ret;

0 commit comments

Comments
 (0)