Skip to content

Commit 7ebf0df

Browse files
committed
AMDGPU: Test gfx940 mfma intrinsics on gfx950
This requires splitting the xf32 cases into a separate file
1 parent 920c589 commit 7ebf0df

File tree

2 files changed

+50
-38
lines changed

2 files changed

+50
-38
lines changed

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx940.ll

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,13 @@
33
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s
44
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s
55

6+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,VGPRCD %s
7+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,VGPRCD %s
8+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940,AGPRCD %s
9+
; RUN: llc -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL,AGPRCD %s
10+
611
declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32)
712
declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32)
8-
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
9-
declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
1013
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64, i64, <4 x float>, i32, i32, i32)
1114
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64, i64, <4 x float>, i32, i32, i32)
1215
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64, i64, <4 x float>, i32, i32, i32)
@@ -66,42 +69,6 @@ bb:
6669
ret void
6770
}
6871

69-
; GCN-LABEL: {{^}}test_mfma_f32_16x16x8xf32:
70-
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0
71-
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0
72-
; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000
73-
; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0
74-
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
75-
; GFX940: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
76-
; GISEL: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
77-
; GCN-NOT: v_accvgpr_read_b32
78-
; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
79-
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
80-
bb:
81-
%in.1 = load <4 x float>, ptr addrspace(1) %arg
82-
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
83-
store <4 x float> %mai.1, ptr addrspace(1) %arg
84-
ret void
85-
}
86-
87-
; GCN-LABEL: {{^}}test_mfma_f32_32x32x4xf32:
88-
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0
89-
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0
90-
; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000
91-
; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0
92-
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
93-
; GFX940: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
94-
; GISEL: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
95-
; GCN-NOT: v_accvgpr_read_b32
96-
; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
97-
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
98-
bb:
99-
%in.1 = load <16 x float>, ptr addrspace(1) %arg
100-
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
101-
store <16 x float> %mai.1, ptr addrspace(1) %arg
102-
ret void
103-
}
104-
10572
; GCN-LABEL: {{^}}test_mfma_f32_16x16x32_bf8_bf8:
10673
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1
10774
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX940 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel -verify-machineinstrs < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GISEL %s
5+
6+
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
7+
declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
8+
9+
; GCN-LABEL: {{^}}test_mfma_f32_16x16x8xf32:
10+
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0
11+
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0
12+
; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000
13+
; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0
14+
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
15+
; GFX940: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
16+
; GISEL: v_mfma_f32_16x16x8_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
17+
; GCN-NOT: v_accvgpr_read_b32
18+
; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
19+
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
20+
bb:
21+
%in.1 = load <4 x float>, ptr addrspace(1) %arg
22+
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
23+
store <4 x float> %mai.1, ptr addrspace(1) %arg
24+
ret void
25+
}
26+
27+
; GCN-LABEL: {{^}}test_mfma_f32_32x32x4xf32:
28+
; GFX940-DAG: v_mov_b32_e32 v[[ONE:[0-9]+]], 1.0
29+
; GFX940-DAG: v_mov_b32_e32 v[[TWO:[0-9]+]], 2.0
30+
; GFX940-DAG: v_mov_b32_e32 v[[THREE:[0-9]+]], 0x40400000
31+
; GFX940-DAG: v_mov_b32_e32 v[[FOUR:[0-9]+]], 4.0
32+
; GCN-COUNT-4: v_accvgpr_write_b32 a{{[0-9]+}}, s{{[0-9]+}}
33+
; GFX940: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[[[ONE]]:[[TWO]]], v[[[THREE]]:[[FOUR]]], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
34+
; GISEL: v_mfma_f32_32x32x4_xf32 a[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], a[{{[0-9]+:[0-9]+}}] cbsz:1 abid:2 blgp:3
35+
; GCN-NOT: v_accvgpr_read_b32
36+
; GCN: global_store_dwordx4 v{{[0-9]+}}, a[{{[0-9:]+}}]
37+
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
38+
bb:
39+
%in.1 = load <16 x float>, ptr addrspace(1) %arg
40+
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
41+
store <16 x float> %mai.1, ptr addrspace(1) %arg
42+
ret void
43+
}
44+
45+
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }

0 commit comments

Comments
 (0)