Skip to content

Commit 3db4f5b

Browse files
authored
AMDGPU: Refine gfx950 xdl-write-vgpr hazard cases (#117285)
The 2-pass XDL write VGPR, read by non-XDL SGEMM/DGEMM case was 1 wait state overly conservative. Previously, for gfx940, the XDL/non-XDL cases happened to have the same number of cycles in all cases. Now the XDL consumer case has an additional state for 2 pass sources.
1 parent 20bd029 commit 3db4f5b

File tree

2 files changed

+23
-14
lines changed

2 files changed

+23
-14
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,8 +2232,8 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
22322232
}
22332233

22342234
static int
2235-
GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
2236-
bool IsGFX950) {
2235+
GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
2236+
bool IsGFX950) {
22372237
// xdl def cycles | gfx940 | gfx950
22382238
// 2 pass | 3 4
22392239
// 4 pass | 5 6
@@ -2242,6 +2242,17 @@ GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses,
22422242
return NumPasses + 1 + IsGFX950;
22432243
}
22442244

2245+
static int
2246+
GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
2247+
bool IsGFX950) {
2248+
// xdl def cycles | gfx940 | gfx950
2249+
// 2 pass | 3 3
2250+
// 4 pass | 5 6
2251+
// 8 pass | 9 10
2252+
// 16 pass | 17 18
2253+
return NumPasses + 1 + (NumPasses != 2 && IsGFX950);
2254+
}
2255+
22452256
static int
22462257
GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
22472258
// 2 pass -> 2
@@ -2379,8 +2390,11 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
23792390

23802391
NeedWaitStates =
23812392
isXDL(ST, *MI1)
2382-
? GFX940_XDL_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
2383-
NumPasses, ST.hasGFX950Insts())
2393+
? (isXDL(ST, *MI)
2394+
? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
2395+
NumPasses, ST.hasGFX950Insts())
2396+
: GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
2397+
NumPasses, ST.hasGFX950Insts()))
23842398
: GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
23852399
NumPasses);
23862400
break;

llvm/test/CodeGen/AMDGPU/mai-hazards-gfx940.mir

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ body: |
156156
...
157157
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
158158
# GCN: V_MFMA
159-
# GFX940-NEXT: S_NOP 2
160-
# GFX950-NEXT: S_NOP 3
159+
# GCN-NEXT: S_NOP 2
161160
# GCN-NEXT: V_MFMA
162161
name: sgemm4x4_mfma_write_vgpr_mfma_read_overlap
163162
body: |
@@ -348,8 +347,7 @@ body: |
348347
...
349348
# GCN-LABEL: name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
350349
# GCN: V_MFMA
351-
# GFX940-NEXT: S_NOP 2
352-
# GFX950-NEXT: S_NOP 3
350+
# GCN-NEXT: S_NOP 2
353351
# GCN-NEXT: V_MFMA
354352
name: sgemm4x4_mfma_write_vgpr_dgemm_mfma_read_overlap
355353
body: |
@@ -1403,8 +1401,7 @@ body: |
14031401
...
14041402
# GCN-LABEL: name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
14051403
# GCN: V_MFMA
1406-
# GFX940-NEXT: S_NOP 2
1407-
# GFX950-NEXT: S_NOP 3
1404+
# GCN-NEXT: S_NOP 2
14081405
# GCN-NEXT: V_MFMA
14091406
name: sgemm4x4_mfma_write_agpr_dgemm_mfma_read_overlap
14101407
body: |
@@ -1885,8 +1882,7 @@ body: |
18851882
...
18861883
# GCN-LABEL: name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
18871884
# GCN: V_MFMA
1888-
# GFX940-NEXT: S_NOP 2
1889-
# GFX950-NEXT: S_NOP 3
1885+
# GCN-NEXT: S_NOP 2
18901886
# GCN-NEXT: V_MFMA
18911887
name: xdl_sgemm4x4_mfma_write_agpr_mfma_read_overlap
18921888
body: |
@@ -2220,8 +2216,7 @@ body: |
22202216
# 2 pass source
22212217
# GCN-LABEL: name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
22222218
# GCN: V_MFMA
2223-
# GFX940-NEXT: S_NOP 2
2224-
# GFX950-NEXT: S_NOP 3
2219+
# GCN-NEXT: S_NOP 2
22252220
# GCN-NEXT: V_MFMA
22262221
name: xdl_mfma_2pass_write_vgpr_sgemm_mfma_read_overlap_srcc
22272222
body: |

0 commit comments

Comments
 (0)