@@ -282,3 +282,168 @@ body: |
282
282
%13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
283
283
FLAT_STORE_DWORD %13, %0.sub1, 0, 0, implicit $exec, implicit $flat_scr
284
284
...
285
+
286
+ ---
287
+ # GCN-LABEL: name: diffoporder_add_global_atomic_cmpswap
288
+ # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
289
+ # GFX9: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
290
+
291
+ # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
292
+ # GFX8: GLOBAL_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
293
+
294
+ name : diffoporder_add_global_atomic_cmpswap
295
+ body : |
296
+ bb.0.entry:
297
+
298
+ %0:vreg_64 = COPY $vgpr0_vgpr1
299
+
300
+ %1:sgpr_32 = S_MOV_B32 4000
301
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
302
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
303
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
304
+ GLOBAL_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec
305
+
306
+ %8:sgpr_32 = S_MOV_B32 3000
307
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
308
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
309
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
310
+ GLOBAL_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec
311
+ ...
312
+
313
+ ---
314
+ # GCN-LABEL: name: diffoporder_add_flat_atomic_cmpswap
315
+ # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 1000, 0,
316
+ # GFX9: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
317
+
318
+ # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
319
+ # GFX8: FLAT_ATOMIC_CMPSWAP %{{[0-9]+}}, %0, 0, 0,
320
+
321
+ name : diffoporder_add_flat_atomic_cmpswap
322
+ body : |
323
+ bb.0.entry:
324
+
325
+ %0:vreg_64 = COPY $vgpr0_vgpr1
326
+
327
+ %1:sgpr_32 = S_MOV_B32 4000
328
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
329
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
330
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
331
+ FLAT_ATOMIC_CMPSWAP %6:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
332
+
333
+
334
+ %8:sgpr_32 = S_MOV_B32 3000
335
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
336
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
337
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
338
+ FLAT_ATOMIC_CMPSWAP %13:vreg_64, %0:vreg_64, 0, 0, implicit $exec, implicit $flat_scr
339
+ ...
340
+
341
+ ---
342
+ # GCN-LABEL: name: diffoporder_add_global_atomic_add
343
+ # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
344
+ # GFX9: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
345
+
346
+ # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
347
+ # GFX8: GLOBAL_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
348
+
349
+ name : diffoporder_add_global_atomic_add
350
+ body : |
351
+ bb.0.entry:
352
+
353
+ %0:vreg_64 = COPY $vgpr0_vgpr1
354
+
355
+ %1:sgpr_32 = S_MOV_B32 4000
356
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
357
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
358
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
359
+ GLOBAL_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec
360
+
361
+ %8:sgpr_32 = S_MOV_B32 3000
362
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
363
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
364
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
365
+ GLOBAL_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec
366
+ ...
367
+
368
+ ---
369
+ # GCN-LABEL: name: diffoporder_add_flat_atomic_add
370
+ # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 1000, 0,
371
+ # GFX9: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
372
+
373
+ # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
374
+ # GFX8: FLAT_ATOMIC_ADD %{{[0-9]+}}, %0.sub0, 0, 0,
375
+
376
+ name : diffoporder_add_flat_atomic_add
377
+ body : |
378
+ bb.0.entry:
379
+
380
+ %0:vreg_64 = COPY $vgpr0_vgpr1
381
+
382
+ %1:sgpr_32 = S_MOV_B32 4000
383
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
384
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
385
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
386
+ FLAT_ATOMIC_ADD %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
387
+
388
+
389
+ %8:sgpr_32 = S_MOV_B32 3000
390
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
391
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
392
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
393
+ FLAT_ATOMIC_ADD %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
394
+ ...
395
+
396
+ ---
397
+ # GCN-LABEL: name: diffoporder_add_global_atomic_add_rtn
398
+ # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
399
+ # GFX9: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
400
+
401
+ # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
402
+ # GFX8: GLOBAL_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
403
+
404
+ name : diffoporder_add_global_atomic_add_rtn
405
+ body : |
406
+ bb.0.entry:
407
+
408
+ %0:vreg_64 = COPY $vgpr0_vgpr1
409
+
410
+ %1:sgpr_32 = S_MOV_B32 4000
411
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
412
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
413
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
414
+ %14:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec
415
+
416
+ %8:sgpr_32 = S_MOV_B32 3000
417
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
418
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
419
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
420
+ %15:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec
421
+ ...
422
+
423
+ ---
424
+ # GCN-LABEL: name: diffoporder_add_flat_atomic_add_rtn
425
+ # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 1000, 0,
426
+ # GFX9: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
427
+
428
+ # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
429
+ # GFX8: FLAT_ATOMIC_ADD_RTN %{{[0-9]+}}, %0.sub0, 0, 0,
430
+
431
+ name : diffoporder_add_flat_atomic_add_rtn
432
+ body : |
433
+ bb.0.entry:
434
+
435
+ %0:vreg_64 = COPY $vgpr0_vgpr1
436
+
437
+ %1:sgpr_32 = S_MOV_B32 4000
438
+ %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
439
+ %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
440
+ %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
441
+ %14:vgpr_32 = FLAT_ATOMIC_ADD_RTN %6:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
442
+
443
+
444
+ %8:sgpr_32 = S_MOV_B32 3000
445
+ %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
446
+ %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
447
+ %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
448
+ %15:vgpr_32 = FLAT_ATOMIC_ADD_RTN %13:vreg_64, %0.sub0, 0, 0, implicit $exec, implicit $flat_scr
449
+ ...
0 commit comments