Skip to content

Commit 2442b74

Browse files
davejiangvinodkoul
authored andcommitted
dmaengine: idxd: process batch descriptor completion record faults
Add event log processing for faulting of user batch descriptor completion record. When encountering an event log entry for a page fault on a completion record, the driver is expected to do the following: 1. If the "first error in batch" bit in event log entry error info is set, discard any previously recorded errors associated with the "batch identifier". 2. Fix the page fault according to the fault address in the event log. If successful, write the completion record to the fault address in user space. 3. If an error is encountered while writing the completion record and it is associated to a descriptor in the batch, the driver associates the error with the batch identifier of the event log entry and tracks it until the event log entry for the corresponding batch desc is encountered. While processing an event log entry for a batch descriptor with error indicating that one or more descs in the batch had event log entries, the driver will do the following before writing the batch completion record: 1. If the status field of the completion record is 0x1, the driver will change it to error code 0x5 (one or more operations in batch completed with status not successful) and changes the result field to 1. 2. If the status is error code 0x6 (page fault on batch descriptor list address), change the result field to 1. 3. If status is any other value, the completion record is not changed. 4. Clear the recorded error in preparation for next batch with same batch identifier. The result field is for user software to determine whether to set the "Batch Error" flag bit in the descriptor for continuation of partial batch descriptor completion. See DSA spec 2.0 for additional information. If no error has been recorded for the batch, the batch completion record is written to user space as is. Tested-by: Tony Zhu <[email protected]> Signed-off-by: Dave Jiang <[email protected]> Co-developed-by: Fenghua Yu <[email protected]> Signed-off-by: Fenghua Yu <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Vinod Koul <[email protected]>
1 parent 6926987 commit 2442b74

File tree

5 files changed

+78
-25
lines changed

5 files changed

+78
-25
lines changed

drivers/dma/idxd/idxd.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ struct idxd_driver_data {
265265
int compl_size;
266266
int align;
267267
int evl_cr_off;
268+
int cr_status_off;
269+
int cr_result_off;
268270
};
269271

270272
struct idxd_evl {
@@ -278,6 +280,7 @@ struct idxd_evl {
278280
u16 size;
279281
u16 head;
280282
unsigned long *bmap;
283+
bool batch_fail[IDXD_MAX_BATCH_IDENT];
281284
};
282285

283286
struct idxd_evl_fault {

drivers/dma/idxd/init.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ static struct idxd_driver_data idxd_driver_data[] = {
4747
.align = 32,
4848
.dev_type = &dsa_device_type,
4949
.evl_cr_off = offsetof(struct dsa_evl_entry, cr),
50+
.cr_status_off = offsetof(struct dsa_completion_record, status),
51+
.cr_result_off = offsetof(struct dsa_completion_record, result),
5052
},
5153
[IDXD_TYPE_IAX] = {
5254
.name_prefix = "iax",
@@ -55,6 +57,8 @@ static struct idxd_driver_data idxd_driver_data[] = {
5557
.align = 64,
5658
.dev_type = &iax_device_type,
5759
.evl_cr_off = offsetof(struct iax_evl_entry, cr),
60+
.cr_status_off = offsetof(struct iax_completion_record, status),
61+
.cr_result_off = offsetof(struct iax_completion_record, error_code),
5862
},
5963
};
6064

drivers/dma/idxd/irq.c

Lines changed: 67 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -225,37 +225,79 @@ static void idxd_evl_fault_work(struct work_struct *work)
225225
struct idxd_wq *wq = fault->wq;
226226
struct idxd_device *idxd = wq->idxd;
227227
struct device *dev = &idxd->pdev->dev;
228+
struct idxd_evl *evl = idxd->evl;
228229
struct __evl_entry *entry_head = fault->entry;
229230
void *cr = (void *)entry_head + idxd->data->evl_cr_off;
230-
int cr_size = idxd->data->compl_size, copied;
231+
int cr_size = idxd->data->compl_size;
232+
u8 *status = (u8 *)cr + idxd->data->cr_status_off;
233+
u8 *result = (u8 *)cr + idxd->data->cr_result_off;
234+
int copied, copy_size;
235+
bool *bf;
231236

232237
switch (fault->status) {
233238
case DSA_COMP_CRA_XLAT:
234-
case DSA_COMP_DRAIN_EVL:
235-
/*
236-
* Copy completion record to fault_addr in user address space
237-
* that is found by wq and PASID.
238-
*/
239-
copied = idxd_copy_cr(wq, entry_head->pasid,
240-
entry_head->fault_addr,
241-
cr, cr_size);
242-
/*
243-
* The task that triggered the page fault is unknown currently
244-
* because multiple threads may share the user address
245-
* space or the task exits already before this fault.
246-
* So if the copy fails, SIGSEGV can not be sent to the task.
247-
* Just print an error for the failure. The user application
248-
* waiting for the completion record will time out on this
249-
* failure.
250-
*/
251-
if (copied != cr_size) {
252-
dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n",
253-
cr_size, copied);
239+
if (entry_head->batch && entry_head->first_err_in_batch)
240+
evl->batch_fail[entry_head->batch_id] = false;
241+
242+
copy_size = cr_size;
243+
break;
244+
case DSA_COMP_BATCH_EVL_ERR:
245+
bf = &evl->batch_fail[entry_head->batch_id];
246+
247+
copy_size = entry_head->rcr || *bf ? cr_size : 0;
248+
if (*bf) {
249+
if (*status == DSA_COMP_SUCCESS)
250+
*status = DSA_COMP_BATCH_FAIL;
251+
*result = 1;
252+
*bf = false;
254253
}
255254
break;
255+
case DSA_COMP_DRAIN_EVL:
256+
copy_size = cr_size;
257+
break;
256258
default:
257-
dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n",
258-
DSA_COMP_STATUS(entry_head->error));
259+
copy_size = 0;
260+
dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status);
261+
break;
262+
}
263+
264+
if (copy_size == 0)
265+
return;
266+
267+
/*
268+
* Copy completion record to fault_addr in user address space
269+
* that is found by wq and PASID.
270+
*/
271+
copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr,
272+
cr, copy_size);
273+
/*
274+
* The task that triggered the page fault is unknown currently
275+
* because multiple threads may share the user address
276+
* space or the task exits already before this fault.
277+
* So if the copy fails, SIGSEGV can not be sent to the task.
278+
* Just print an error for the failure. The user application
279+
* waiting for the completion record will time out on this
280+
* failure.
281+
*/
282+
switch (fault->status) {
283+
case DSA_COMP_CRA_XLAT:
284+
if (copied != copy_size) {
285+
dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n",
286+
copy_size, copied);
287+
if (entry_head->batch)
288+
evl->batch_fail[entry_head->batch_id] = true;
289+
}
290+
break;
291+
case DSA_COMP_BATCH_EVL_ERR:
292+
if (copied != copy_size) {
293+
dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n",
294+
copy_size, copied);
295+
}
296+
break;
297+
case DSA_COMP_DRAIN_EVL:
298+
if (copied != copy_size)
299+
dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n",
300+
copy_size, copied);
259301
break;
260302
}
261303

@@ -274,7 +316,8 @@ static void process_evl_entry(struct idxd_device *idxd,
274316
} else {
275317
status = DSA_COMP_STATUS(entry_head->error);
276318

277-
if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) {
319+
if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL ||
320+
status == DSA_COMP_BATCH_EVL_ERR) {
278321
struct idxd_evl_fault *fault;
279322
int ent_size = evl_ent_size(idxd);
280323

drivers/dma/idxd/registers.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ union gen_cap_reg {
3535
u64 drain_readback:1;
3636
u64 rsvd2:3;
3737
u64 evl_support:2;
38-
u64 rsvd4:1;
38+
u64 batch_continuation:1;
3939
u64 max_xfer_shift:5;
4040
u64 max_batch_shift:4;
4141
u64 max_ims_mult:6;
@@ -577,6 +577,8 @@ union evl_status_reg {
577577
u64 bits;
578578
} __packed;
579579

580+
#define IDXD_MAX_BATCH_IDENT 256
581+
580582
struct __evl_entry {
581583
u64 rsvd:2;
582584
u64 desc_valid:1;

include/uapi/linux/idxd.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ enum dsa_completion_status {
136136
DSA_COMP_HW_ERR_DRB,
137137
DSA_COMP_TRANSLATION_FAIL,
138138
DSA_COMP_DRAIN_EVL = 0x26,
139+
DSA_COMP_BATCH_EVL_ERR,
139140
};
140141

141142
enum iax_completion_status {

0 commit comments

Comments
 (0)