Skip to content

Commit 9d39bc2

Browse files
VigneshwarJllvmbot
authored andcommitted
AMDGPU: Handle gfx950 XDL-write-VGPR-VALU-Mem-Exp wait state change (#126727)
(cherry picked from commit a2263eb)
1 parent 4c4ed5e commit 9d39bc2

7 files changed

+389
-374
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2611,12 +2611,14 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
26112611
return NumPasses + 3;
26122612
}
26132613

2614-
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
2615-
// 2 pass -> 5
2616-
// 4 pass -> 7
2617-
// 8 pass -> 11
2618-
// 16 pass -> 19
2619-
return NumPasses + 3;
2614+
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
2615+
bool IsGFX950) {
2616+
// xdl def cycles | gfx940 | gfx950
2617+
// 2 pass | 5 5
2618+
// 4 pass | 7 8
2619+
// 8 pass | 11 12
2620+
// 16 pass | 19 20
2621+
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
26202622
}
26212623

26222624
static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
@@ -2767,7 +2769,8 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
27672769
} else if (ST.hasGFX940Insts()) {
27682770
NeedWaitStates =
27692771
isXDL(ST, *MFMA)
2770-
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(NumPasses)
2772+
? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
2773+
NumPasses, ST.hasGFX950Insts())
27712774
: GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
27722775
NumPasses);
27732776
} else {

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16(<8 x bfloat> %arg0, <8 x
4949
; GCN-NEXT: v_mov_b32_e32 v9, s17
5050
; GCN-NEXT: v_mov_b32_e32 v10, s18
5151
; GCN-NEXT: v_mov_b32_e32 v11, s19
52-
; GCN-NEXT: s_nop 3
52+
; GCN-NEXT: s_nop 4
5353
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
5454
; GCN-NEXT: s_waitcnt vmcnt(0)
5555
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -122,7 +122,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__flags(<8 x bfloat> %arg0
122122
; GCN-NEXT: v_mov_b32_e32 v9, s17
123123
; GCN-NEXT: v_mov_b32_e32 v10, s18
124124
; GCN-NEXT: v_mov_b32_e32 v11, s19
125-
; GCN-NEXT: s_nop 3
125+
; GCN-NEXT: s_nop 4
126126
; GCN-NEXT: global_store_dwordx4 v[12:13], a[28:31], off sc0 sc1
127127
; GCN-NEXT: s_waitcnt vmcnt(0)
128128
; GCN-NEXT: global_store_dwordx4 v[14:15], a[24:27], off sc0 sc1
@@ -179,7 +179,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac(<8 x bfloat> %arg0, <8 x b
179179
; GCN-NEXT: s_nop 1
180180
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
181181
; GCN-NEXT: s_nop 7
182-
; GCN-NEXT: s_nop 2
182+
; GCN-NEXT: s_nop 3
183183
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
184184
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
185185
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -224,7 +224,7 @@ define <16 x float> @test_mfma_f32_32x32x16_bf16__mac__flags(<8 x bfloat> %arg0,
224224
; GCN-NEXT: s_nop 1
225225
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:1 abid:1 blgp:1
226226
; GCN-NEXT: s_nop 7
227-
; GCN-NEXT: s_nop 2
227+
; GCN-NEXT: s_nop 3
228228
; GCN-NEXT: v_accvgpr_read_b32 v0, a0
229229
; GCN-NEXT: v_accvgpr_read_b32 v1, a1
230230
; GCN-NEXT: v_accvgpr_read_b32 v2, a2
@@ -417,7 +417,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
417417
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
418418
; GCN-NEXT: v_mov_b32_e32 v0, 0
419419
; GCN-NEXT: s_nop 7
420-
; GCN-NEXT: s_nop 1
420+
; GCN-NEXT: s_nop 2
421421
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
422422
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
423423
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
@@ -459,7 +459,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
459459
; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
460460
; GCN-NEXT: v_mov_b32_e32 v0, 0
461461
; GCN-NEXT: s_nop 7
462-
; GCN-NEXT: s_nop 1
462+
; GCN-NEXT: s_nop 2
463463
; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
464464
; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
465465
; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16

0 commit comments

Comments
 (0)