Skip to content

Commit 70b565b

Browse files
vaibhav92mpe
authored andcommitted
cxl: Prevent adapter reset if an active context exists
This patch prevents resetting the cxl adapter via sysfs in presence of one or more active cxl_context on it. This protects against an unrecoverable error caused by PSL owning a dirty cache line even after reset and host tries to touch the same cache line. In case a force reset of the card is required irrespective of any active contexts, the int value -1 can be stored in the 'reset' sysfs attribute of the card. The patch introduces a new atomic_t member named contexts_num inside struct cxl that holds the number of active context attached to the card , which is checked against '0' before proceeding with the reset. To prevent against a race condition where a context is activated just after reset check is performed, the contexts_num is atomically set to '-1' after reset-check to indicate that no more contexts can be activated on the card anymore. Before activating a context we atomically test if contexts_num is non-negative and if so, increment its value by one. In case the value of contexts_num is negative then it indicates that the card is about to be reset and context activation is error-ed out at that point. Fixes: 62fa19d ("cxl: Add ability to reset the card") Cc: [email protected] # v4.0+ Acked-by: Frederic Barrat <[email protected]> Reviewed-by: Andrew Donnellan <[email protected]> Signed-off-by: Vaibhav Jain <[email protected]> Signed-off-by: Michael Ellerman <[email protected]>
1 parent 65bc3ec commit 70b565b

File tree

9 files changed

+121
-7
lines changed

9 files changed

+121
-7
lines changed

Documentation/ABI/testing/sysfs-class-cxl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,11 @@ What: /sys/class/cxl/<card>/reset
220220
Date: October 2014
221221
222222
Description: write only
223-
Writing 1 will issue a PERST to card which may cause the card
224-
to reload the FPGA depending on load_image_on_perst.
223+
Writing 1 will issue a PERST to card provided there are no
224+
contexts active on any one of the card AFUs. This may cause
225+
the card to reload the FPGA depending on load_image_on_perst.
226+
Writing -1 will do a force PERST irrespective of any active
227+
contexts on the card AFUs.
225228
Users: https://github.com/ibm-capi/libcxl
226229

227230
What: /sys/class/cxl/<card>/perst_reloads_same_image (not in a guest)

drivers/misc/cxl/api.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,14 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
229229
if (ctx->status == STARTED)
230230
goto out; /* already started */
231231

232+
/*
233+
* Increment the mapped context count for adapter. This also checks
234+
* if adapter_context_lock is taken.
235+
*/
236+
rc = cxl_adapter_context_get(ctx->afu->adapter);
237+
if (rc)
238+
goto out;
239+
232240
if (task) {
233241
ctx->pid = get_task_pid(task, PIDTYPE_PID);
234242
ctx->glpid = get_task_pid(task->group_leader, PIDTYPE_PID);
@@ -240,6 +248,7 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed,
240248

241249
if ((rc = cxl_ops->attach_process(ctx, kernel, wed, 0))) {
242250
put_pid(ctx->pid);
251+
cxl_adapter_context_put(ctx->afu->adapter);
243252
cxl_ctx_put();
244253
goto out;
245254
}

drivers/misc/cxl/context.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,9 @@ int __detach_context(struct cxl_context *ctx)
238238
put_pid(ctx->glpid);
239239

240240
cxl_ctx_put();
241+
242+
/* Decrease the attached context count on the adapter */
243+
cxl_adapter_context_put(ctx->afu->adapter);
241244
return 0;
242245
}
243246

drivers/misc/cxl/cxl.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,6 +618,14 @@ struct cxl {
618618
bool perst_select_user;
619619
bool perst_same_image;
620620
bool psl_timebase_synced;
621+
622+
/*
623+
* number of contexts mapped on to this card. Possible values are:
624+
* >0: Number of contexts mapped and new one can be mapped.
625+
* 0: No active contexts and new ones can be mapped.
626+
* -1: No contexts mapped and new ones cannot be mapped.
627+
*/
628+
atomic_t contexts_num;
621629
};
622630

623631
int cxl_pci_alloc_one_irq(struct cxl *adapter);
@@ -944,4 +952,20 @@ bool cxl_pci_is_vphb_device(struct pci_dev *dev);
944952

945953
/* decode AFU error bits in the PSL register PSL_SERR_An */
946954
void cxl_afu_decode_psl_serr(struct cxl_afu *afu, u64 serr);
955+
956+
/*
957+
* Increments the number of attached contexts on an adapter.
958+
* In case an adapter_context_lock is taken the return -EBUSY.
959+
*/
960+
int cxl_adapter_context_get(struct cxl *adapter);
961+
962+
/* Decrements the number of attached contexts on an adapter */
963+
void cxl_adapter_context_put(struct cxl *adapter);
964+
965+
/* If no active contexts then prevents contexts from being attached */
966+
int cxl_adapter_context_lock(struct cxl *adapter);
967+
968+
/* Unlock the contexts-lock if taken. Warn and force unlock otherwise */
969+
void cxl_adapter_context_unlock(struct cxl *adapter);
970+
947971
#endif

drivers/misc/cxl/file.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,11 +205,22 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
205205
ctx->pid = get_task_pid(current, PIDTYPE_PID);
206206
ctx->glpid = get_task_pid(current->group_leader, PIDTYPE_PID);
207207

208+
/*
209+
* Increment the mapped context count for adapter. This also checks
210+
* if adapter_context_lock is taken.
211+
*/
212+
rc = cxl_adapter_context_get(ctx->afu->adapter);
213+
if (rc) {
214+
afu_release_irqs(ctx, ctx);
215+
goto out;
216+
}
217+
208218
trace_cxl_attach(ctx, work.work_element_descriptor, work.num_interrupts, amr);
209219

210220
if ((rc = cxl_ops->attach_process(ctx, false, work.work_element_descriptor,
211221
amr))) {
212222
afu_release_irqs(ctx, ctx);
223+
cxl_adapter_context_put(ctx->afu->adapter);
213224
goto out;
214225
}
215226

drivers/misc/cxl/guest.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,6 +1152,9 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
11521152
if ((rc = cxl_sysfs_adapter_add(adapter)))
11531153
goto err_put1;
11541154

1155+
/* release the context lock as the adapter is configured */
1156+
cxl_adapter_context_unlock(adapter);
1157+
11551158
return adapter;
11561159

11571160
err_put1:

drivers/misc/cxl/main.c

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -243,8 +243,10 @@ struct cxl *cxl_alloc_adapter(void)
243243
if (dev_set_name(&adapter->dev, "card%i", adapter->adapter_num))
244244
goto err2;
245245

246-
return adapter;
246+
/* start with context lock taken */
247+
atomic_set(&adapter->contexts_num, -1);
247248

249+
return adapter;
248250
err2:
249251
cxl_remove_adapter_nr(adapter);
250252
err1:
@@ -286,6 +288,44 @@ int cxl_afu_select_best_mode(struct cxl_afu *afu)
286288
return 0;
287289
}
288290

291+
int cxl_adapter_context_get(struct cxl *adapter)
292+
{
293+
int rc;
294+
295+
rc = atomic_inc_unless_negative(&adapter->contexts_num);
296+
return rc >= 0 ? 0 : -EBUSY;
297+
}
298+
299+
void cxl_adapter_context_put(struct cxl *adapter)
300+
{
301+
atomic_dec_if_positive(&adapter->contexts_num);
302+
}
303+
304+
int cxl_adapter_context_lock(struct cxl *adapter)
305+
{
306+
int rc;
307+
/* no active contexts -> contexts_num == 0 */
308+
rc = atomic_cmpxchg(&adapter->contexts_num, 0, -1);
309+
return rc ? -EBUSY : 0;
310+
}
311+
312+
void cxl_adapter_context_unlock(struct cxl *adapter)
313+
{
314+
int val = atomic_cmpxchg(&adapter->contexts_num, -1, 0);
315+
316+
/*
317+
* contexts lock taken -> contexts_num == -1
318+
* If not true then show a warning and force reset the lock.
319+
* This will happen when context_unlock was requested without
320+
* doing a context_lock.
321+
*/
322+
if (val != -1) {
323+
atomic_set(&adapter->contexts_num, 0);
324+
WARN(1, "Adapter context unlocked with %d active contexts",
325+
val);
326+
}
327+
}
328+
289329
static int __init init_cxl(void)
290330
{
291331
int rc = 0;

drivers/misc/cxl/pci.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,6 +1487,8 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev)
14871487
if ((rc = cxl_native_register_psl_err_irq(adapter)))
14881488
goto err;
14891489

1490+
/* Release the context lock as adapter is configured */
1491+
cxl_adapter_context_unlock(adapter);
14901492
return 0;
14911493

14921494
err:

drivers/misc/cxl/sysfs.c

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,12 +75,31 @@ static ssize_t reset_adapter_store(struct device *device,
7575
int val;
7676

7777
rc = sscanf(buf, "%i", &val);
78-
if ((rc != 1) || (val != 1))
78+
if ((rc != 1) || (val != 1 && val != -1))
7979
return -EINVAL;
8080

81-
if ((rc = cxl_ops->adapter_reset(adapter)))
82-
return rc;
83-
return count;
81+
/*
82+
* See if we can lock the context mapping that's only allowed
83+
* when there are no contexts attached to the adapter. Once
84+
* taken this will also prevent any context from getting activated.
85+
*/
86+
if (val == 1) {
87+
rc = cxl_adapter_context_lock(adapter);
88+
if (rc)
89+
goto out;
90+
91+
rc = cxl_ops->adapter_reset(adapter);
92+
/* In case reset failed release context lock */
93+
if (rc)
94+
cxl_adapter_context_unlock(adapter);
95+
96+
} else if (val == -1) {
97+
/* Perform a forced adapter reset */
98+
rc = cxl_ops->adapter_reset(adapter);
99+
}
100+
101+
out:
102+
return rc ? rc : count;
84103
}
85104

86105
static ssize_t load_image_on_perst_show(struct device *device,

0 commit comments

Comments
 (0)