@@ -1334,92 +1334,82 @@ main_body:
1334
1334
ret void
1335
1335
}
1336
1336
1337
- define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat (ptr %ptr ) #1 {
1337
+ define void @flat_atomic_fadd_f64_noret_pat (ptr %ptr ) #1 {
1338
1338
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat:
1339
1339
; GFX90A: ; %bb.0: ; %main_body
1340
- ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1340
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1341
1341
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1342
1342
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1343
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1344
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1345
1343
; GFX90A-NEXT: buffer_wbl2
1346
1344
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1347
1345
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1348
1346
; GFX90A-NEXT: buffer_invl2
1349
1347
; GFX90A-NEXT: buffer_wbinvl1_vol
1350
- ; GFX90A-NEXT: s_endpgm
1348
+ ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1351
1349
;
1352
1350
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat:
1353
1351
; GFX942: ; %bb.0: ; %main_body
1354
- ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1352
+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1355
1353
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1356
- ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1357
- ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1358
1354
; GFX942-NEXT: buffer_wbl2 sc0 sc1
1359
1355
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
1360
1356
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1361
1357
; GFX942-NEXT: buffer_inv sc0 sc1
1362
- ; GFX942-NEXT: s_endpgm
1358
+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
1363
1359
main_body:
1364
1360
%ret = atomicrmw fadd ptr %ptr , double 4 .0 seq_cst , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
1365
1361
ret void
1366
1362
}
1367
1363
1368
- define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent (ptr %ptr ) #1 {
1364
+ define void @flat_atomic_fadd_f64_noret_pat_agent (ptr %ptr ) #1 {
1369
1365
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1370
1366
; GFX90A: ; %bb.0: ; %main_body
1371
- ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1367
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1372
1368
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1373
1369
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1374
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1375
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1376
1370
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1377
1371
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1378
1372
; GFX90A-NEXT: buffer_wbinvl1_vol
1379
- ; GFX90A-NEXT: s_endpgm
1373
+ ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1380
1374
;
1381
1375
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent:
1382
1376
; GFX942: ; %bb.0: ; %main_body
1383
- ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1377
+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1384
1378
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1385
- ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1386
- ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1387
1379
; GFX942-NEXT: buffer_wbl2 sc1
1388
1380
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1389
1381
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1390
1382
; GFX942-NEXT: buffer_inv sc1
1391
- ; GFX942-NEXT: s_endpgm
1383
+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
1392
1384
main_body:
1393
1385
%ret = atomicrmw fadd ptr %ptr , double 4 .0 syncscope("agent" ) seq_cst , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
1394
1386
ret void
1395
1387
}
1396
1388
1397
- define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system (ptr %ptr ) #1 {
1389
+ define void @flat_atomic_fadd_f64_noret_pat_system (ptr %ptr ) #1 {
1398
1390
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1399
1391
; GFX90A: ; %bb.0: ; %main_body
1400
- ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1392
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1401
1393
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1402
1394
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1403
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1404
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1405
1395
; GFX90A-NEXT: buffer_wbl2
1406
1396
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1407
1397
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1408
1398
; GFX90A-NEXT: buffer_invl2
1409
1399
; GFX90A-NEXT: buffer_wbinvl1_vol
1410
- ; GFX90A-NEXT: s_endpgm
1400
+ ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1401
+ ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1411
1402
;
1412
1403
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_system:
1413
1404
; GFX942: ; %bb.0: ; %main_body
1414
- ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1405
+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1415
1406
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1416
- ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1417
- ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1418
1407
; GFX942-NEXT: buffer_wbl2 sc0 sc1
1419
1408
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3] sc1
1420
1409
; GFX942-NEXT: s_waitcnt vmcnt(0)
1421
1410
; GFX942-NEXT: buffer_inv sc0 sc1
1422
- ; GFX942-NEXT: s_endpgm
1411
+ ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1412
+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
1423
1413
main_body:
1424
1414
%ret = atomicrmw fadd ptr %ptr , double 4 .0 syncscope("one-as" ) seq_cst , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
1425
1415
ret void
@@ -1506,30 +1496,26 @@ main_body:
1506
1496
ret double %ret
1507
1497
}
1508
1498
1509
- define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe (ptr %ptr ) {
1499
+ define void @flat_atomic_fadd_f64_noret_pat_agent_safe (ptr %ptr ) {
1510
1500
; GFX90A-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1511
1501
; GFX90A: ; %bb.0: ; %main_body
1512
- ; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1502
+ ; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1513
1503
; GFX90A-NEXT: v_mov_b32_e32 v2, 0
1514
1504
; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000
1515
- ; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
1516
- ; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
1517
1505
; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1518
1506
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1519
1507
; GFX90A-NEXT: buffer_wbinvl1_vol
1520
- ; GFX90A-NEXT: s_endpgm
1508
+ ; GFX90A-NEXT: s_setpc_b64 s[30:31]
1521
1509
;
1522
1510
; GFX942-LABEL: flat_atomic_fadd_f64_noret_pat_agent_safe:
1523
1511
; GFX942: ; %bb.0: ; %main_body
1524
- ; GFX942-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
1512
+ ; GFX942-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1525
1513
; GFX942-NEXT: v_mov_b64_e32 v[2:3], 4.0
1526
- ; GFX942-NEXT: s_waitcnt lgkmcnt(0)
1527
- ; GFX942-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
1528
1514
; GFX942-NEXT: buffer_wbl2 sc1
1529
1515
; GFX942-NEXT: flat_atomic_add_f64 v[0:1], v[2:3]
1530
1516
; GFX942-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1531
1517
; GFX942-NEXT: buffer_inv sc1
1532
- ; GFX942-NEXT: s_endpgm
1518
+ ; GFX942-NEXT: s_setpc_b64 s[30:31]
1533
1519
main_body:
1534
1520
%ret = atomicrmw fadd ptr %ptr , double 4 .0 syncscope("agent" ) seq_cst , !noalias.addrspace !1 , !amdgpu.no.fine.grained.memory !0
1535
1521
ret void
0 commit comments