Skip to content

Commit d898c95

Browse files
committed
[AMDGPU] Add gfx940 run line to gfx90a mfma test. NFC.
1 parent 40c6bab commit d898c95

File tree

1 file changed

+21
-0
lines changed

1 file changed

+21
-0
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s
23

34
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
45
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)
@@ -44,9 +45,13 @@ declare i32 @llvm.amdgcn.workitem.id.x()
4445
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
4546
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
4647
; GFX90A-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
48+
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
49+
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
50+
; GFX940-COUNT-32: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
4751
; GFX90A-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
4852
; GFX90A-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
4953
; GFX90A: v_mfma_f32_32x32x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
54+
; GFX940: v_mfma_f32_32x32x4_2b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
5055
; GCN-NOT: v_accvgpr_read_b32
5156
; GCN-COUNT-8: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
5257
define amdgpu_kernel void @test_mfma_f32_32x32x4bf16_1k(<32 x float> addrspace(1)* %arg) #0 {
@@ -64,7 +69,9 @@ bb:
6469
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
6570
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
6671
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
72+
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
6773
; GFX90A: v_mfma_f32_16x16x4bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
74+
; GFX940: v_mfma_f32_16x16x4_4b_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
6875
; GCN-NOT: v_accvgpr_read_b32
6976
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
7077
define amdgpu_kernel void @test_mfma_f32_16x16x4bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
@@ -82,7 +89,9 @@ bb:
8289
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
8390
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
8491
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
92+
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
8593
; GFX90A: v_mfma_f32_4x4x4bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
94+
; GFX940: v_mfma_f32_4x4x4_16b_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
8695
; GCN-NOT: v_accvgpr_read_b32
8796
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
8897
define amdgpu_kernel void @test_mfma_f32_4x4x4bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
@@ -100,7 +109,9 @@ bb:
100109
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
101110
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
102111
; GFX90A-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
112+
; GFX940-COUNT-16: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
103113
; GFX90A: v_mfma_f32_32x32x8bf16_1k a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
114+
; GFX940: v_mfma_f32_32x32x8_bf16 a[{{[0-9]+:[0-9]+}}], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
104115
; GCN-NOT: v_accvgpr_read_b32
105116
; GCN-COUNT-4: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
106117
define amdgpu_kernel void @test_mfma_f32_32x32x8bf16_1k(<16 x float> addrspace(1)* %arg) #0 {
@@ -118,7 +129,9 @@ bb:
118129
; GCN-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
119130
; GCN-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
120131
; GFX90A-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, v{{[0-9]+}}
132+
; GFX940-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
121133
; GFX90A: v_mfma_f32_16x16x16bf16_1k [[RES:a\[[0-9]+:[0-9]+\]]], v[[[ONE]]:{{[0-9]+}}], v[[[TWO]]:{{[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
134+
; GFX940: v_mfma_f32_16x16x16_bf16 [[RES:a\[[0-9]+:[0-9]+\]]], v{{\[}}[[ONE]]:{{[0-9+]}}], v{{\[}}[[TWO]]:{{[0-9+]}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
122135
; GCN-NOT: v_accvgpr_read_b32
123136
; GCN: global_store_dwordx4 v{{[0-9]+}}, [[RES]],
124137
define amdgpu_kernel void @test_mfma_f32_16x16x16bf16_1k(<4 x float> addrspace(1)* %arg) #0 {
@@ -134,6 +147,8 @@ bb:
134147
; GCN-LABEL: {{^}}test_mfma_f64_4x4x4f64:
135148
; GFX90A: v_mfma_f64_4x4x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
136149
; GFX90A: v_mfma_f64_4x4x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
150+
; GFX940: v_mfma_f64_4x4x4_4b_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
151+
; GFX940: v_mfma_f64_4x4x4_4b_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
137152
; GCN: global_store_dwordx2
138153
define amdgpu_kernel void @test_mfma_f64_4x4x4f64(double addrspace(1)* %arg, double %a, double %b) #0 {
139154
bb:
@@ -146,6 +161,7 @@ bb:
146161
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64:
147162
; GCN: s_load_dwordx8
148163
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
164+
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 neg:[1,1,0]
149165
; GCN: global_store_dwordx4
150166
; GCN: global_store_dwordx4
151167
define amdgpu_kernel void @test_mfma_f64_16x16x4f64(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
@@ -159,6 +175,8 @@ bb:
159175
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_imm:
160176
; GFX90A: v_mfma_f64_16x16x4f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
161177
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 blgp:3
178+
; GFX940: v_mfma_f64_16x16x4_f64 [[M1:a\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], 0{{$}}
179+
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], [[M1]] cbsz:1 abid:2 neg:[1,1,0]
162180
; GCN: global_store_dwordx4
163181
; GCN: global_store_dwordx4
164182
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
@@ -171,6 +189,7 @@ bb:
171189

172190
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_imm:
173191
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
192+
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
174193
; GCN: global_store_dwordx4
175194
; GCN: global_store_dwordx4
176195
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_imm(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {
@@ -183,7 +202,9 @@ bb:
183202
; GCN-LABEL: {{^}}test_mfma_f64_16x16x4f64_splat_lit:
184203
; GCN-DAG: v_accvgpr_write_b32 a{{[0-9]+}}, 0{{$}}
185204
; GFX90A-DAG: v_mov_b32_e32 v{{[0-9]+}}, 0x405ec000
205+
; GFX940-DAG: s_mov_b32 s{{[0-9]+}}, 0x405ec000
186206
; GFX90A: v_mfma_f64_16x16x4f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
207+
; GFX940: v_mfma_f64_16x16x4_f64 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}]{{$}}
187208
; GCN: global_store_dwordx4
188209
; GCN: global_store_dwordx4
189210
define amdgpu_kernel void @test_mfma_f64_16x16x4f64_splat_lit(<4 x double> addrspace(1)* %arg, double %a, double %b) #0 {

0 commit comments

Comments
 (0)