Skip to content

Commit 89c8cee

Browse files
arsenmpravinjagtap
authored andcommitted
AMDGPU: Handle v_mfma_f64_16x16x4_f64 srcc write VGPR hazard change for gfx950 (llvm#117283)
Read by sgemm/dgemm in srcc after v_mfma_f64_16x16x4_f64 increases from 9 to 17 wait states.
1 parent 3343485 commit 89c8cee

File tree

2 files changed

+38
-13
lines changed

2 files changed

+38
-13
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2298,6 +2298,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
22982298
const int SMFMA16x16WritesVGPROverlappedDMFMASrcCWaitStates = 9;
22992299
const int SMFMA32x32WritesVGPROverlappedDMFMASrcCWaitStates = 17;
23002300
const int DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 9;
2301+
const int GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates = 17;
23012302
const int DMFMA4x4WritesVGPROverlappedSrcCWaitStates = 4;
23022303
const int SMFMA4x4WritesVGPROverlappedSrcABWaitStates = 5;
23032304
const int SMFMA16x16WritesVGPROverlappedSrcABWaitStates = 11;
@@ -2355,7 +2356,10 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23552356
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_e64:
23562357
case AMDGPU::V_MFMA_F64_16X16X4F64_mac_vgprcd_e64:
23572358
if (!isXDL(ST, *MI))
2358-
NeedWaitStates = DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
2359+
NeedWaitStates =
2360+
ST.hasGFX950Insts()
2361+
? GFX950_DMFMA16x16WritesVGPROverlappedSrcCWaitStates
2362+
: DMFMA16x16WritesVGPROverlappedSrcCWaitStates;
23592363
break;
23602364
case AMDGPU::V_MFMA_F64_4X4X4F64_e64:
23612365
case AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64:

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,12 @@ body: |
298298
...
299299
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
300300
# GCN: V_MFMA
301-
# GCN-NEXT: S_NOP 7
302-
# GCN-NEXT: S_NOP 0
301+
# GFX940-NEXT: S_NOP 7
302+
# GFX940-NEXT: S_NOP 0
303+
304+
# GFX950-NEXT: S_NOP 7
305+
# GFX950-NEXT: S_NOP 7
306+
# GFX950-NEXT: S_NOP 0
303307
# GCN-NEXT: V_MFMA
304308
name: dgemm16x16_mfma_write_vgpr_mfma_read_overlap
305309
body: |
@@ -319,8 +323,12 @@ body: |
319323
...
320324
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
321325
# GCN: V_MFMA
322-
# GCN-NEXT: S_NOP 7
323-
# GCN-NEXT: S_NOP 0
326+
# GFX940-NEXT: S_NOP 7
327+
# GFX940-NEXT: S_NOP 0
328+
329+
# GFX950-NEXT: S_NOP 7
330+
# GFX950-NEXT: S_NOP 7
331+
# GFX950-NEXT: S_NOP 0
324332
# GCN-NEXT: V_MFMA
325333
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_read_overlap
326334
body: |
@@ -549,8 +557,12 @@ body: |
549557
...
550558
# GCN-LABEL: name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
551559
# GCN: V_MFMA
552-
# GCN-NEXT: S_NOP 7
553-
# GCN-NEXT: S_NOP 2
560+
# GFX940-NEXT: S_NOP 7
561+
# GFX940-NEXT: S_NOP 2
562+
563+
# GFX950-NEXT: S_NOP 7
564+
# GFX950-NEXT: S_NOP 7
565+
# GFX950-NEXT: S_NOP 0
554566
# GCN-NEXT: V_MFMA
555567
name: dgemm16x16_mfma_write_vgpr_sgemm_mfma_srca_read_overlap
556568
body: |
@@ -1333,8 +1345,12 @@ body: |
13331345
...
13341346
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
13351347
# GCN: V_MFMA
1336-
# GCN-NEXT: S_NOP 7
1337-
# GCN-NEXT: S_NOP 0
1348+
# GFX940-NEXT: S_NOP 7
1349+
# GFX940-NEXT: S_NOP 0
1350+
1351+
# GFX950-NEXT: S_NOP 7
1352+
# GFX950-NEXT: S_NOP 7
1353+
# GFX950-NEXT: S_NOP 0
13381354
# GCN-NEXT: V_MFMA
13391355
name: dgemm16x16_mfma_write_agpr_mfma_read_overlap
13401356
body: |
@@ -1354,8 +1370,13 @@ body: |
13541370
...
13551371
# GCN-LABEL: name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
13561372
# GCN: V_MFMA
1357-
# GCN-NEXT: S_NOP 7
1358-
# GCN-NEXT: S_NOP 0
1373+
# GFX940-NEXT: S_NOP 7
1374+
# GFX940-NEXT: S_NOP 0
1375+
1376+
# GFX950-NEXT: S_NOP 7
1377+
# GFX950-NEXT: S_NOP 7
1378+
# GFX950-NEXT: S_NOP 0
1379+
13591380
# GCN-NEXT: V_MFMA
13601381
name: dgemm16x16_mfma_write_agpr_sgemm_mfma_read_overlap
13611382
body: |
@@ -2502,8 +2523,8 @@ body: |
25022523
...
25032524
# GCN-LABEL: name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
25042525
# GCN: V_MFMA
2505-
# GFX940: S_NOP 4
2506-
# GFX950: S_NOP 5
2526+
# GFX940-NEXT: S_NOP 4
2527+
# GFX950-NEXT: S_NOP 5
25072528
# GCN-NEXT: V_SMFMAC_
25082529
name: xdl_4pass_mfma_write_agpr_smfmac_read_overlap_srcc
25092530
body: |

0 commit comments

Comments
 (0)